commit afda4610a8c1ed9fe3adc86c6fc1b08b5fdae7aa Author: Owen O'Malley Date: Fri May 13 14:44:34 2016 -0700 HIVE-9660 Add length to ORC indexes so that the reader knows how much to read. diff --git orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java index 24715c3..7e82fda 100644 --- orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java +++ orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java @@ -6715,6 +6715,20 @@ public Builder clearHasNull() { * optional .orc.proto.ColumnStatistics statistics = 2; */ org.apache.orc.OrcProto.ColumnStatisticsOrBuilder getStatisticsOrBuilder(); + + // repeated uint32 lengths = 3 [packed = true]; + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + java.util.List getLengthsList(); + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + int getLengthsCount(); + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + int getLengths(int index); } /** * Protobuf type {@code orc.proto.RowIndexEntry} @@ -6801,6 +6815,27 @@ private RowIndexEntry( bitField0_ |= 0x00000001; break; } + case 24: { + if (!((mutable_bitField0_ & 0x00000004) == 0x00000004)) { + lengths_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000004; + } + lengths_.add(input.readUInt32()); + break; + } + case 26: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000004) == 0x00000004) && input.getBytesUntilLimit() > 0) { + lengths_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000004; + } + while (input.getBytesUntilLimit() > 0) { + lengths_.add(input.readUInt32()); + } + input.popLimit(limit); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -6812,6 +6847,9 @@ private RowIndexEntry( if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { positions_ = java.util.Collections.unmodifiableList(positions_); } + if (((mutable_bitField0_ & 0x00000004) == 0x00000004)) { + lengths_ = java.util.Collections.unmodifiableList(lengths_); + } this.unknownFields = unknownFields.build(); makeExtensionsImmutable(); } @@ -6890,9 +6928,34 @@ public boolean hasStatistics() { return statistics_; } + // repeated uint32 lengths = 3 [packed = true]; + public static final int LENGTHS_FIELD_NUMBER = 3; + private java.util.List lengths_; + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public java.util.List + getLengthsList() { + return lengths_; + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public int getLengthsCount() { + return lengths_.size(); + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public int getLengths(int index) { + return lengths_.get(index); + } + private int lengthsMemoizedSerializedSize = -1; + private void initFields() { positions_ = java.util.Collections.emptyList(); statistics_ = org.apache.orc.OrcProto.ColumnStatistics.getDefaultInstance(); + lengths_ = java.util.Collections.emptyList(); } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -6916,6 +6979,13 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000001) == 0x00000001)) { output.writeMessage(2, statistics_); } + if (getLengthsList().size() > 0) { + output.writeRawVarint32(26); + output.writeRawVarint32(lengthsMemoizedSerializedSize); + } + for (int i = 0; i < lengths_.size(); i++) { + output.writeUInt32NoTag(lengths_.get(i)); + } getUnknownFields().writeTo(output); } @@ -6943,6 +7013,20 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeMessageSize(2, statistics_); } + { + int dataSize = 0; + for (int i = 0; i < lengths_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeUInt32SizeNoTag(lengths_.get(i)); + } + size += dataSize; + if (!getLengthsList().isEmpty()) { + size += 1; + size += com.google.protobuf.CodedOutputStream + .computeInt32SizeNoTag(dataSize); + } + lengthsMemoizedSerializedSize = dataSize; + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -7068,6 +7152,8 @@ public Builder clear() { statisticsBuilder_.clear(); } bitField0_ = (bitField0_ & ~0x00000002); + lengths_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000004); return this; } @@ -7109,6 +7195,11 @@ public Builder clone() { } else { result.statistics_ = statisticsBuilder_.build(); } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + lengths_ = java.util.Collections.unmodifiableList(lengths_); + bitField0_ = (bitField0_ & ~0x00000004); + } + result.lengths_ = lengths_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -7138,6 +7229,16 @@ public Builder mergeFrom(org.apache.orc.OrcProto.RowIndexEntry other) { if (other.hasStatistics()) { mergeStatistics(other.getStatistics()); } + if (!other.lengths_.isEmpty()) { + if (lengths_.isEmpty()) { + lengths_ = other.lengths_; + bitField0_ = (bitField0_ & ~0x00000004); + } else { + ensureLengthsIsMutable(); + lengths_.addAll(other.lengths_); + } + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -7348,6 +7449,72 @@ public Builder clearStatistics() { return statisticsBuilder_; } + // repeated uint32 lengths = 3 [packed = true]; + private java.util.List lengths_ = java.util.Collections.emptyList(); + private void ensureLengthsIsMutable() { + if (!((bitField0_ & 0x00000004) == 0x00000004)) { + lengths_ = new java.util.ArrayList(lengths_); + bitField0_ |= 0x00000004; + } + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public java.util.List + getLengthsList() { + return java.util.Collections.unmodifiableList(lengths_); + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public int getLengthsCount() { + return lengths_.size(); + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public int getLengths(int index) { + return lengths_.get(index); + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public Builder setLengths( + int index, int value) { + ensureLengthsIsMutable(); + lengths_.set(index, value); + onChanged(); + return this; + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public Builder addLengths(int value) { + ensureLengthsIsMutable(); + lengths_.add(value); + onChanged(); + return this; + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public Builder addAllLengths( + java.lang.Iterable values) { + ensureLengthsIsMutable(); + super.addAll(values, lengths_); + onChanged(); + return this; + } + /** + * repeated uint32 lengths = 3 [packed = true]; + */ + public Builder clearLengths() { + lengths_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000004); + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:orc.proto.RowIndexEntry) } @@ -19088,56 +19255,57 @@ public Builder setMagicBytes( "Statistics\0225\n\020binaryStatistics\030\010 \001(\0132\033.o" + "rc.proto.BinaryStatistics\022;\n\023timestampSt" + "atistics\030\t \001(\0132\036.orc.proto.TimestampStat" + - "istics\022\017\n\007hasNull\030\n \001(\010\"W\n\rRowIndexEntry" + + "istics\022\017\n\007hasNull\030\n \001(\010\"l\n\rRowIndexEntry" + "\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022/\n\nstatistics\030\002 " + - "\001(\0132\033.orc.proto.ColumnStatistics\"3\n\010RowI" + - "ndex\022\'\n\005entry\030\001 \003(\0132\030.orc.proto.RowIndex" + - "Entry\"7\n\013BloomFilter\022\030\n\020numHashFunctions" + - "\030\001 \001(\r\022\016\n\006bitset\030\002 \003(\006\"?\n\020BloomFilterInd", - "ex\022+\n\013bloomFilter\030\001 \003(\0132\026.orc.proto.Bloo" + - "mFilter\"\325\001\n\006Stream\022$\n\004kind\030\001 \001(\0162\026.orc.p" + - "roto.Stream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006leng" + - "th\030\003 \001(\004\"\204\001\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001" + - "\022\n\n\006LENGTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DIC" + - "TIONARY_COUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_IN" + - "DEX\020\006\022\020\n\014BLOOM_FILTER\020\007\"\234\001\n\016ColumnEncodi" + - "ng\022,\n\004kind\030\001 \001(\0162\036.orc.proto.ColumnEncod" + - "ing.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind" + - "\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V", - "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"v\n\014StripeFooter\022\"" + - "\n\007streams\030\001 \003(\0132\021.orc.proto.Stream\022*\n\007co" + - "lumns\030\002 \003(\0132\031.orc.proto.ColumnEncoding\022\026" + - "\n\016writerTimezone\030\003 \001(\t\"\341\002\n\004Type\022\"\n\004kind\030" + - "\001 \001(\0162\024.orc.proto.Type.Kind\022\024\n\010subtypes\030" + - "\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaximum" + - "Length\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005scale" + - "\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t" + - "\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022" + - "\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tT", - "IMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020" + - "\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007V" + - "ARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformation\022" + - "\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\n" + - "dataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024" + - "\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem" + - "\022\014\n\004name\030\001 \001(\t\022\r\n\005value\030\002 \001(\014\"A\n\020StripeS" + - "tatistics\022-\n\010colStats\030\001 \003(\0132\033.orc.proto." + - "ColumnStatistics\"<\n\010Metadata\0220\n\013stripeSt" + - "ats\030\001 \003(\0132\033.orc.proto.StripeStatistics\"\222", - "\002\n\006Footer\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rconte" + - "ntLength\030\002 \001(\004\022-\n\007stripes\030\003 \003(\0132\034.orc.pr" + - "oto.StripeInformation\022\036\n\005types\030\004 \003(\0132\017.o" + - "rc.proto.Type\022-\n\010metadata\030\005 \003(\0132\033.orc.pr" + - "oto.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001" + - "(\004\022/\n\nstatistics\030\007 \003(\0132\033.orc.proto.Colum" + - "nStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\305\001\n\n" + - "PostScript\022\024\n\014footerLength\030\001 \001(\004\022/\n\013comp" + - "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" + - "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi", - "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" + - "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" + - "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" + - "APPY\020\002\022\007\n\003LZO\020\003B\020\n\016org.apache.orc" + "\001(\0132\033.orc.proto.ColumnStatistics\022\023\n\007leng" + + "ths\030\003 \003(\rB\002\020\001\"3\n\010RowIndex\022\'\n\005entry\030\001 \003(\013" + + "2\030.orc.proto.RowIndexEntry\"7\n\013BloomFilte" + + "r\022\030\n\020numHashFunctions\030\001 \001(\r\022\016\n\006bitset\030\002 ", + "\003(\006\"?\n\020BloomFilterIndex\022+\n\013bloomFilter\030\001" + + " \003(\0132\026.orc.proto.BloomFilter\"\325\001\n\006Stream\022" + + "$\n\004kind\030\001 \001(\0162\026.orc.proto.Stream.Kind\022\016\n" + + "\006column\030\002 \001(\r\022\016\n\006length\030\003 \001(\004\"\204\001\n\004Kind\022\013" + + "\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGTH\020\002\022\023\n\017DIC" + + "TIONARY_DATA\020\003\022\024\n\020DICTIONARY_COUNT\020\004\022\r\n\t" + + "SECONDARY\020\005\022\r\n\tROW_INDEX\020\006\022\020\n\014BLOOM_FILT" + + "ER\020\007\"\234\001\n\016ColumnEncoding\022,\n\004kind\030\001 \001(\0162\036." + + "orc.proto.ColumnEncoding.Kind\022\026\n\016diction" + + "arySize\030\002 \001(\r\"D\n\004Kind\022\n\n\006DIRECT\020\000\022\016\n\nDIC", + "TIONARY\020\001\022\r\n\tDIRECT_V2\020\002\022\021\n\rDICTIONARY_V" + + "2\020\003\"v\n\014StripeFooter\022\"\n\007streams\030\001 \003(\0132\021.o" + + "rc.proto.Stream\022*\n\007columns\030\002 \003(\0132\031.orc.p" + + "roto.ColumnEncoding\022\026\n\016writerTimezone\030\003 " + + "\001(\t\"\341\002\n\004Type\022\"\n\004kind\030\001 \001(\0162\024.orc.proto.T" + + "ype.Kind\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\nfieldN" + + "ames\030\003 \003(\t\022\025\n\rmaximumLength\030\004 \001(\r\022\021\n\tpre" + + "cision\030\005 \001(\r\022\r\n\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007" + + "BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022" + + "\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRI", + "NG\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020" + + "\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DEC" + + "IMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"" + + "x\n\021StripeInformation\022\016\n\006offset\030\001 \001(\004\022\023\n\013" + + "indexLength\030\002 \001(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n" + + "\014footerLength\030\004 \001(\004\022\024\n\014numberOfRows\030\005 \001(" + + "\004\"/\n\020UserMetadataItem\022\014\n\004name\030\001 \001(\t\022\r\n\005v" + + "alue\030\002 \001(\014\"A\n\020StripeStatistics\022-\n\010colSta" + + "ts\030\001 \003(\0132\033.orc.proto.ColumnStatistics\"<\n" + + "\010Metadata\0220\n\013stripeStats\030\001 \003(\0132\033.orc.pro", + "to.StripeStatistics\"\222\002\n\006Footer\022\024\n\014header" + + "Length\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022-\n\007s" + + "tripes\030\003 \003(\0132\034.orc.proto.StripeInformati" + + "on\022\036\n\005types\030\004 \003(\0132\017.orc.proto.Type\022-\n\010me" + + "tadata\030\005 \003(\0132\033.orc.proto.UserMetadataIte" + + "m\022\024\n\014numberOfRows\030\006 \001(\004\022/\n\nstatistics\030\007 " + + "\003(\0132\033.orc.proto.ColumnStatistics\022\026\n\016rowI" + + "ndexStride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014foote" + + "rLength\030\001 \001(\004\022/\n\013compression\030\002 \001(\0162\032.orc" + + ".proto.CompressionKind\022\034\n\024compressionBlo", + "ckSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016met" + + "adataLength\030\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r" + + "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" + + "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\020\n\016" + + "org.apache.orc" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -19203,7 +19371,7 @@ public Builder setMagicBytes( internal_static_orc_proto_RowIndexEntry_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_orc_proto_RowIndexEntry_descriptor, - new java.lang.String[] { "Positions", "Statistics", }); + new java.lang.String[] { "Positions", "Statistics", "Lengths", }); internal_static_orc_proto_RowIndex_descriptor = getDescriptor().getMessageTypes().get(10); internal_static_orc_proto_RowIndex_fieldAccessorTable = new diff --git orc/src/java/org/apache/orc/OrcFile.java orc/src/java/org/apache/orc/OrcFile.java index 7dd7333..089ee6a 100644 --- orc/src/java/org/apache/orc/OrcFile.java +++ orc/src/java/org/apache/orc/OrcFile.java @@ -529,7 +529,7 @@ public static WriterOptions writerOptions(Properties tableProperties, private static ThreadLocal memoryManager = null; - private static synchronized MemoryManager getStaticMemoryManager( + static synchronized MemoryManager getStaticMemoryManager( final Configuration conf) { if (memoryManager == null) { memoryManager = new ThreadLocal() { diff --git orc/src/java/org/apache/orc/impl/BitFieldWriter.java orc/src/java/org/apache/orc/impl/BitFieldWriter.java index aa5f886..04d9bb9 100644 --- orc/src/java/org/apache/orc/impl/BitFieldWriter.java +++ orc/src/java/org/apache/orc/impl/BitFieldWriter.java @@ -17,10 +17,6 @@ */ package org.apache.orc.impl; -import org.apache.orc.impl.PositionRecorder; -import org.apache.orc.impl.PositionedOutputStream; -import org.apache.orc.impl.RunLengthByteWriter; - import java.io.IOException; public class BitFieldWriter { @@ -70,4 +66,8 @@ public void getPosition(PositionRecorder recorder) throws IOException { output.getPosition(recorder); recorder.addPosition(8 - bitsLeft); } + + public void registerCallback(PositionedOutputStream.CompressionCallback cb) { + output.registerCallback(cb); + } } diff --git orc/src/java/org/apache/orc/impl/IntegerWriter.java orc/src/java/org/apache/orc/impl/IntegerWriter.java index 419054f..9b4058c 100644 --- orc/src/java/org/apache/orc/impl/IntegerWriter.java +++ orc/src/java/org/apache/orc/impl/IntegerWriter.java @@ -44,4 +44,9 @@ * @throws IOException */ void flush() throws IOException; + + /** + * Register a callback for when the next compression is done. + */ + void registerCallback(OutStream.CompressionCallback callback); } diff --git orc/src/java/org/apache/orc/impl/MemoryManager.java orc/src/java/org/apache/orc/impl/MemoryManager.java index 757c0b4..80d867e 100644 --- orc/src/java/org/apache/orc/impl/MemoryManager.java +++ orc/src/java/org/apache/orc/impl/MemoryManager.java @@ -154,6 +154,16 @@ public void removeWriter(Path path) throws IOException { } /** + * Clear the writers that are currently registered. + * This is intended for test cases. + */ + public void clearAll() throws IOException { + checkOwner(); + writerList.clear(); + updateScale(false); + } + + /** * Get the total pool size that is available for ORC writers. * @return the number of bytes in the pool */ diff --git orc/src/java/org/apache/orc/impl/OutStream.java orc/src/java/org/apache/orc/impl/OutStream.java index 81662cc..1a16115 100644 --- orc/src/java/org/apache/orc/impl/OutStream.java +++ orc/src/java/org/apache/orc/impl/OutStream.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; public class OutStream extends PositionedOutputStream { @@ -65,6 +67,7 @@ private final CompressionCodec codec; private long compressedBytes = 0; private long uncompressedBytes = 0; + private final List callbacks = new ArrayList<>(); public OutStream(String name, int bufferSize, @@ -77,6 +80,21 @@ public OutStream(String name, this.suppress = false; } + /** + * Register a callback for when the next compression buffer is completed. + * @param callback the method to call when the block is done. + */ + @Override + public void registerCallback(CompressionCallback callback) { + if (codec == null) { + callback.compressionDone(uncompressedBytes); + } else if (current == null || current.position() == HEADER_SIZE) { + callback.compressionDone(compressedBytes); + } else { + callbacks.add(callback); + } + } + public void clear() throws IOException { flush(); suppress = false; @@ -155,6 +173,13 @@ public void write(byte[] bytes, int offset, int length) throws IOException { } } + private void doCallbacks(long position) { + for(CompressionCallback callback: callbacks) { + callback.compressionDone(position); + } + callbacks.clear(); + } + private void spill() throws java.io.IOException { // if there isn't anything in the current buffer, don't spill if (current == null || @@ -185,6 +210,7 @@ private void spill() throws java.io.IOException { } compressedBytes += totalBytes + HEADER_SIZE; writeHeader(compressed, sizePosn, totalBytes, false); + doCallbacks(compressedBytes); // if we have less than the next header left, spill it. if (compressed.remaining() < HEADER_SIZE) { compressed.flip(); @@ -194,6 +220,7 @@ private void spill() throws java.io.IOException { } } else { compressedBytes += uncompressedBytes + HEADER_SIZE; + doCallbacks(compressedBytes); uncompressedBytes = 0; // we are using the original, but need to spill the current // compressed buffer first. So back up to where we started, diff --git orc/src/java/org/apache/orc/impl/PositionedOutputStream.java orc/src/java/org/apache/orc/impl/PositionedOutputStream.java index d412939..55e5ac0 100644 --- orc/src/java/org/apache/orc/impl/PositionedOutputStream.java +++ orc/src/java/org/apache/orc/impl/PositionedOutputStream.java @@ -22,6 +22,13 @@ public abstract class PositionedOutputStream extends OutputStream { + public interface CompressionCallback { + /** + * Called when each compression finished. + */ + void compressionDone(long position); + } + /** * Record the current position to the recorder. * @param recorder the object that receives the position @@ -36,4 +43,10 @@ public abstract void getPosition(PositionRecorder recorder * @return the number of bytes used by buffers. */ public abstract long getBufferSize(); + + /** + * Register a callback for when the next compression buffer is completed. + * @param callback the method to call when the block is done. + */ + public abstract void registerCallback(CompressionCallback callback); } diff --git orc/src/java/org/apache/orc/impl/RecordReaderUtils.java orc/src/java/org/apache/orc/impl/RecordReaderUtils.java index 1067957..11a916e 100644 --- orc/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ orc/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -248,14 +248,21 @@ public static void addRgFilteredStreamToRanges(OrcProto.Stream stream, if (!includedRowGroups[group]) continue; int posn = getIndexPosition( encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); - long start = index.getEntry(group).getPositions(posn); - final long nextGroupOffset; - boolean isLast = group == (includedRowGroups.length - 1); - nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); + OrcProto.RowIndexEntry entry = index.getEntry(group); + long start = entry.getPositions(posn); start += offset; - long end = offset + estimateRgEndOffset( - isCompressed, isLast, nextGroupOffset, length, compressionSize); + long end; + if (entry.getLengthsCount() != 0) { + end = start + entry.getLengths(getLengthPosition(encoding.getKind(), + type.getKind(), stream.getKind(), hasNull)); + } else { + long nextGroupOffset; + boolean isLast = group == (includedRowGroups.length - 1); + nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); + end = offset + estimateRgEndOffset( + isCompressed, isLast, nextGroupOffset, length, compressionSize); + } list.addOrMerge(start, end, doMergeBuffers, true); } } @@ -342,6 +349,70 @@ public static int getIndexPosition(OrcProto.ColumnEncoding.Kind columnEncoding, } } + /** + * Get the offset in the length list for the column that the given + * stream starts. + * @param columnEncoding the encoding of the column + * @param columnType the type of the column + * @param streamType the kind of the stream + * @param hasNulls does the column have a PRESENT stream? + * @return the number of positions that will be used for that stream + */ + public static int getLengthPosition(OrcProto.ColumnEncoding.Kind columnEncoding, + OrcProto.Type.Kind columnType, + OrcProto.Stream.Kind streamType, + boolean hasNulls) { + if (streamType == OrcProto.Stream.Kind.PRESENT) { + return 0; + } + int base = hasNulls ? 1 : 0; + switch (columnType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case DATE: + case STRUCT: + case MAP: + case LIST: + case UNION: + return base; + case CHAR: + case VARCHAR: + case STRING: + if (columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY || + columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { + return base; + } else { + if (streamType == OrcProto.Stream.Kind.DATA) { + return base; + } else { + return base + 1; + } + } + case BINARY: + if (streamType == OrcProto.Stream.Kind.DATA) { + return base; + } + return base + 1; + case DECIMAL: + if (streamType == OrcProto.Stream.Kind.DATA) { + return base; + } + return base + 1; + case TIMESTAMP: + if (streamType == OrcProto.Stream.Kind.DATA) { + return base; + } + return base + 1; + default: + throw new IllegalArgumentException("Unknown type " + columnType); + } + } + // for uncompressed streams, what is the most overlap with the following set // of rows (long vint literal group). static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512; diff --git orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java index 09108b2..c72cfe2 100644 --- orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java +++ orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java @@ -18,6 +18,8 @@ package org.apache.orc.impl; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * A streamFactory that writes a sequence of bytes. A control byte is written before @@ -33,6 +35,8 @@ private int numLiterals = 0; private boolean repeat = false; private int tailRunLength = 0; + private List callbacks = + new ArrayList<>(); public RunLengthByteWriter(PositionedOutputStream output) { this.output = output; @@ -51,6 +55,10 @@ private void writeValues() throws IOException { tailRunLength = 0; numLiterals = 0; } + for(PositionedOutputStream.CompressionCallback cb: callbacks) { + output.registerCallback(cb); + } + callbacks.clear(); } public void flush() throws IOException { @@ -103,4 +111,12 @@ public void getPosition(PositionRecorder recorder) throws IOException { output.getPosition(recorder); recorder.addPosition(numLiterals); } + + public void registerCallback(PositionedOutputStream.CompressionCallback cb) { + if (numLiterals == 0) { + output.registerCallback(cb); + } else { + callbacks.add(cb); + } + } } diff --git orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java index 3e5f2e2..3563523 100644 --- orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java +++ orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java @@ -18,6 +18,8 @@ package org.apache.orc.impl; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * A streamFactory that writes a sequence of integers. A control byte is written before @@ -39,6 +41,8 @@ private boolean repeat = false; private int tailRunLength = 0; private SerializationUtils utils; + private final List callbacks = + new ArrayList<>(); public RunLengthIntegerWriter(PositionedOutputStream output, boolean signed) { @@ -70,6 +74,10 @@ private void writeValues() throws IOException { repeat = false; numLiterals = 0; tailRunLength = 0; + for(OutStream.CompressionCallback cb: callbacks) { + output.registerCallback(cb); + }; + callbacks.clear(); } } @@ -80,6 +88,15 @@ public void flush() throws IOException { } @Override + public void registerCallback(OutStream.CompressionCallback callback) { + if (numLiterals == 0) { + output.registerCallback(callback); + } else { + callbacks.add(callback); + } + } + + @Override public void write(long value) throws IOException { if (numLiterals == 0) { literals[numLiterals++] = value; @@ -139,5 +156,4 @@ public void getPosition(PositionRecorder recorder) throws IOException { output.getPosition(recorder); recorder.addPosition(numLiterals); } - } diff --git orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java index fab2801..c3d10a2 100644 --- orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java +++ orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java @@ -18,6 +18,8 @@ package org.apache.orc.impl; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * A writer that performs light weight compression over sequence of integers. @@ -159,6 +161,8 @@ private boolean isFixedDelta; private SerializationUtils utils; private boolean alignedBitpacking; + private final List callbacks = + new ArrayList<>(); RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) { this(output, signed, true); @@ -188,6 +192,10 @@ private void writeValues() throws IOException { // clear all the variables clear(); + for(PositionedOutputStream.CompressionCallback cb: callbacks) { + output.registerCallback(cb); + } + callbacks.clear(); } } @@ -716,6 +724,15 @@ public void flush() throws IOException { } @Override + public void registerCallback(OutStream.CompressionCallback callback) { + if (numLiterals == 0) { + output.registerCallback(callback); + } else { + callbacks.add(callback); + } + } + + @Override public void write(long val) throws IOException { if (numLiterals == 0) { initializeLiterals(val); diff --git orc/src/java/org/apache/orc/impl/StringRedBlackTree.java orc/src/java/org/apache/orc/impl/StringRedBlackTree.java index c353ab0..ab50f6d 100644 --- orc/src/java/org/apache/orc/impl/StringRedBlackTree.java +++ orc/src/java/org/apache/orc/impl/StringRedBlackTree.java @@ -191,6 +191,19 @@ public void getText(Text result, int originalPosition) { byteArray.setText(result, offset, length); } + public int write(OutputStream out, + int originalPosition) throws IOException { + int offset = keyOffsets.get(originalPosition); + int length; + if (originalPosition + 1 == keyOffsets.size()) { + length = byteArray.size() - offset; + } else { + length = keyOffsets.get(originalPosition + 1) - offset; + } + byteArray.write(out, offset, length); + return length; + } + /** * Get the size of the character data in the table. * @return the bytes used by the table diff --git orc/src/java/org/apache/orc/impl/WriterImpl.java orc/src/java/org/apache/orc/impl/WriterImpl.java index b2966e0..2c9cc44 100644 --- orc/src/java/org/apache/orc/impl/WriterImpl.java +++ orc/src/java/org/apache/orc/impl/WriterImpl.java @@ -63,7 +63,6 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.io.Text; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; @@ -97,9 +96,6 @@ private static final int HDFS_BUFFER_SIZE = 256 * 1024; private static final int MIN_ROW_INDEX_STRIDE = 1000; - // threshold above which buffer size will be automatically resized - private static final int COLUMN_COUNT_THRESHOLD = 1000; - private final FileSystem fs; private final Path path; private final long defaultStripeSize; @@ -383,6 +379,138 @@ public String toString() { } } + private static int nextRowIndexEntryId = 0; + + /** + * This class builds the RowIndexEntry for the row indexes. It represents + * the information about a row group (10,000 rows) with both the statistics + * and the position information. + */ + static class RowIndexEntry implements PositionRecorder { + private int nextStream = 0; + private int streamsLeft = 0; + private boolean isActive = false; + private final int columnId; + private final int entryId; + private final List doneList; + private final List positions = new ArrayList<>(); + private final List lengths = new ArrayList<>(); + private OrcProto.ColumnStatistics statistics; + private int firstPosition = 0; + private int firstLength = 0; + + RowIndexEntry(List doneList, + int columnId) { + this.doneList = doneList; + entryId = nextRowIndexEntryId++; + this.columnId = columnId; + } + + void finishStream(int streamId, int length) { + lengths.set(streamId, length); + if (--streamsLeft == 0 && isActive) { + doneList.add(this); + } + } + + @Override + public void addPosition(long offset) { + positions.add(offset); + } + + /** + * The entry must be active and have no more remaining callbacks, + * for it to be finalized. + */ + public void activate() { + if (!isActive) { + isActive = true; + if (streamsLeft == 0) { + doneList.add(this); + } + } + } + + /** + * Add a callback to this entry. + * @return the new callback + */ + public StreamCallback addCallback(long startPosition) { + int streamId = nextStream++; + streamsLeft += 1; + lengths.add(0); + return new StreamCallback(this, startPosition, streamId); + } + + public int getPositionCount() { + return positions.size(); + } + + public long getPosition(int idx) { + return positions.get(idx); + } + + public void removeStreams(int numPositions, int numStreams) { + firstPosition += numPositions; + firstLength += numStreams; + } + + public void setStatistics(OrcProto.ColumnStatistics statistics) { + this.statistics = statistics; + } + + public OrcProto.RowIndexEntry serialize() { + OrcProto.RowIndexEntry.Builder builder = + OrcProto.RowIndexEntry.newBuilder(); + if (firstLength == 0 && firstPosition == 0) { + builder.addAllPositions(positions); + builder.addAllLengths(lengths); + } else { + builder.addAllPositions(positions.subList(firstPosition, + positions.size())); + builder.addAllLengths(lengths.subList(firstLength, lengths.size())); + } + if (statistics != null) { + builder.setStatistics(statistics); + } + return builder.build(); + } + + public String getName() { + return columnId + "." + entryId; + } + + public String toString() { + return "row index entry " + columnId + "." + entryId + + (isActive ? " active" : " inactive") + + " streams left = " + streamsLeft; + } + } + + /** + * This class is the proxy that gets called when the next RLE and + * compression blocks are closed. + */ + static class StreamCallback + implements PositionedOutputStream.CompressionCallback { + final RowIndexEntry entry; + final int streamId; + final long startPosition; + + StreamCallback(RowIndexEntry entry, + long startPosition, + int streamId) { + this.entry = entry; + this.streamId = streamId; + this.startPosition = startPosition; + } + + @Override + public void compressionDone(long position) { + entry.finishStream(streamId, (int) (position - startPosition)); + } + } + /** * An output receiver that writes the ByteBuffers to the output stream * as they are received. @@ -401,19 +529,6 @@ public void output(ByteBuffer buffer) throws IOException { } } - private static class RowIndexPositionRecorder implements PositionRecorder { - private final OrcProto.RowIndexEntry.Builder builder; - - RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) { - this.builder = builder; - } - - @Override - public void addPosition(long position) { - builder.addPositions(position); - } - } - /** * Interface from the Writer to the TreeWriters. This limits the visibility * that the TreeWriters have into the Writer. @@ -564,14 +679,14 @@ public boolean hasWriterTimeZone() { private abstract static class TreeWriter { protected final int id; protected final BitFieldWriter isPresent; + private long isPresentStart; private final boolean isCompressed; protected final ColumnStatisticsImpl indexStatistics; protected final ColumnStatisticsImpl stripeColStatistics; - private final ColumnStatisticsImpl fileStatistics; + protected final ColumnStatisticsImpl fileStatistics; protected TreeWriter[] childrenWriters; - protected final RowIndexPositionRecorder rowIndexPosition; - private final OrcProto.RowIndex.Builder rowIndex; - private final OrcProto.RowIndexEntry.Builder rowIndexEntry; + protected final List rowIndex = new ArrayList<>(100); + protected RowIndexEntry rowIndexEntry; private final PositionedOutputStream rowIndexStream; private final PositionedOutputStream bloomFilterStream; protected final BloomFilterIO bloomFilter; @@ -611,9 +726,7 @@ public boolean hasWriterTimeZone() { stripeColStatistics = ColumnStatisticsImpl.create(schema); fileStatistics = ColumnStatisticsImpl.create(schema); childrenWriters = new TreeWriter[0]; - rowIndex = OrcProto.RowIndex.newBuilder(); - rowIndexEntry = OrcProto.RowIndexEntry.newBuilder(); - rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry); + rowIndexEntry = new RowIndexEntry(rowIndex, columnId); stripeStatsBuilders = Lists.newArrayList(); if (streamFactory.buildIndex()) { rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Kind.ROW_INDEX); @@ -634,18 +747,10 @@ public boolean hasWriterTimeZone() { } } - protected OrcProto.RowIndex.Builder getRowIndex() { - return rowIndex; - } - protected ColumnStatisticsImpl getStripeStatistics() { return stripeColStatistics; } - protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() { - return rowIndexEntry; - } - IntegerWriter createIntegerWriter(PositionedOutputStream output, boolean signed, boolean isDirectV2, StreamFactory writer) { @@ -732,13 +837,20 @@ void writeBatch(ColumnVector vector, int offset, } private void removeIsPresentPositions() { - for(int i=0; i < rowIndex.getEntryCount(); ++i) { - OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i); - List positions = entry.getPositionsList(); - // bit streams use 3 positions if uncompressed, 4 if compressed - positions = positions.subList(isCompressed ? 4 : 3, positions.size()); - entry.clearPositions(); - entry.addAllPositions(positions); + int positionsToRemove = isCompressed ? 4 : 3; + for(RowIndexEntry entry: rowIndex) { + entry.removeStreams(positionsToRemove, 1); + } + } + + /** + * Flush all of the streams in preparation for writing the stripe. + * This guarantees that all of the compression callbacks are done before + * we start building the indexes. + */ + void flush() throws IOException { + if (isPresent != null) { + isPresent.flush(); } } @@ -754,17 +866,13 @@ private void removeIsPresentPositions() { */ void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { - if (isPresent != null) { - isPresent.flush(); - - // if no nulls are found in a stream, then suppress the stream - if(!foundNulls) { - isPresentOutStream.suppress(); - // since isPresent bitstream is suppressed, update the index to - // remove the positions of the isPresent stream - if (rowIndexStream != null) { - removeIsPresentPositions(); - } + // if no nulls are found in a stream, then suppress the stream + if(isPresent != null && !foundNulls) { + isPresentOutStream.suppress(); + // since isPresent bitstream is suppressed, update the index to + // remove the positions of the isPresent stream + if (rowIndexStream != null) { + removeIsPresentPositions(); } } @@ -782,16 +890,21 @@ void writeStripe(OrcProto.StripeFooter.Builder builder, builder.setWriterTimezone(TimeZone.getDefault().getID()); } if (rowIndexStream != null) { - if (rowIndex.getEntryCount() != requiredIndexEntries) { - throw new IllegalArgumentException("Column has wrong number of " + - "index entries found: " + rowIndex.getEntryCount() + " expected: " + - requiredIndexEntries); + if (rowIndex.size() != requiredIndexEntries) { + throw new IllegalArgumentException("Column " + id + + " has wrong number of index entries found: " + + rowIndex.size() + " expected: " + + requiredIndexEntries); } - rowIndex.build().writeTo(rowIndexStream); + OrcProto.RowIndex.Builder idxBuilder = OrcProto.RowIndex.newBuilder(); + for(RowIndexEntry entry: rowIndex) { + idxBuilder.addEntry(entry.serialize()); + } + idxBuilder.build().writeTo(rowIndexStream); rowIndexStream.flush(); } rowIndex.clear(); - rowIndexEntry.clear(); + rowIndexEntry = new RowIndexEntry(rowIndex, id); // write the bloom filter to out stream if (bloomFilterStream != null) { @@ -832,17 +945,22 @@ private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder, * next index and ensures all of the children columns also create an entry. * @throws IOException */ - void createRowIndexEntry() throws IOException { + RowIndexEntry createRowIndexEntry() throws IOException { stripeColStatistics.merge(indexStatistics); - rowIndexEntry.setStatistics(indexStatistics.serialize()); + rowIndexEntry.setStatistics(indexStatistics.serialize().build()); + if (isPresent != null) { + isPresent.registerCallback(rowIndexEntry.addCallback(isPresentStart)); + } + RowIndexEntry oldEntry = rowIndexEntry; + + // Reset for the next index point indexStatistics.reset(); - rowIndex.addEntry(rowIndexEntry); - rowIndexEntry.clear(); + rowIndexEntry = new RowIndexEntry(rowIndex, id); addBloomFilterEntry(); - recordPosition(rowIndexPosition); for(TreeWriter child: childrenWriters) { child.createRowIndexEntry(); } + return oldEntry; } void addBloomFilterEntry() { @@ -857,12 +975,13 @@ void addBloomFilterEntry() { /** * Record the current position in each of this column's streams. - * @param recorder where should the locations be recorded + * @param entry where should the locations be recorded * @throws IOException */ - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry entry) throws IOException { if (isPresent != null) { - isPresent.getPosition(recorder); + isPresent.getPosition(entry); + isPresentStart = entry.getPosition(0); } } @@ -881,6 +1000,7 @@ long estimateMemory() { private static class BooleanTreeWriter extends TreeWriter { private final BitFieldWriter writer; + private long writerStart; BooleanTreeWriter(int columnId, TypeDescription schema, @@ -890,7 +1010,7 @@ long estimateMemory() { PositionedOutputStream out = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.writer = new BitFieldWriter(out, 1); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -918,22 +1038,39 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + writer.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); writer.getPosition(recorder); + writerStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + writer.registerCallback(result.addCallback(writerStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class ByteTreeWriter extends TreeWriter { private final RunLengthByteWriter writer; + private long writerStart; ByteTreeWriter(int columnId, TypeDescription schema, @@ -942,7 +1079,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { super(columnId, schema, writer, nullable); this.writer = new RunLengthByteWriter(writer.createStream(id, OrcProto.Stream.Kind.DATA)); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -976,22 +1113,39 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + writer.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); writer.getPosition(recorder); + writerStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + writer.registerCallback(result.addCallback(writerStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class IntegerTreeWriter extends TreeWriter { private final IntegerWriter writer; + private long writerStart; private boolean isDirectV2 = true; IntegerTreeWriter(int columnId, @@ -1003,7 +1157,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { OrcProto.Stream.Kind.DATA); this.isDirectV2 = isNewWriteFormat(writer); this.writer = createIntegerWriter(out, true, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1047,22 +1201,39 @@ void writeBatch(ColumnVector vector, int offset, } @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + writer.registerCallback(result.addCallback(writerStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; + } + + @Override + void flush() throws IOException { + super.flush(); + writer.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); writer.getPosition(recorder); + writerStart = recorder.getPosition(offset); } } private static class FloatTreeWriter extends TreeWriter { private final PositionedOutputStream stream; + private long streamStart; private final SerializationUtils utils; FloatTreeWriter(int columnId, @@ -1073,7 +1244,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { this.stream = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.utils = new SerializationUtils(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1106,24 +1277,40 @@ void writeBatch(ColumnVector vector, int offset, } } + @Override + void flush() throws IOException { + super.flush(); + stream.flush(); + } @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); stream.getPosition(recorder); + streamStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + stream.registerCallback(result.addCallback(streamStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class DoubleTreeWriter extends TreeWriter { private final PositionedOutputStream stream; + private long streamStart; private final SerializationUtils utils; DoubleTreeWriter(int columnId, @@ -1134,7 +1321,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { this.stream = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.utils = new SerializationUtils(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1168,41 +1355,64 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + stream.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + stream.registerCallback(result.addCallback(streamStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; + } + + @Override + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); stream.getPosition(recorder); + streamStart = recorder.getPosition(offset); } } private static abstract class StringBaseTreeWriter extends TreeWriter { private static final int INITIAL_DICTIONARY_SIZE = 4096; private final OutStream stringOutput; - private final IntegerWriter lengthOutput; - private final IntegerWriter rowOutput; + protected final IntegerWriter lengthOutput; + private long lengthOutputStart; + protected final IntegerWriter rowOutput; protected final StringRedBlackTree dictionary = new StringRedBlackTree(INITIAL_DICTIONARY_SIZE); protected final DynamicIntArray rows = new DynamicIntArray(); - protected final PositionedOutputStream directStreamOutput; - protected final IntegerWriter directLengthOutput; - private final List savedRowIndex = - new ArrayList(); - private final boolean buildIndex; + protected final OutStream dataStream; + private long dataStreamStart; + + // the list of row indexes that are saved until we are writing the stripe + // because we want to sort the dictionary + private final List savedRowIndex = new ArrayList<>(); + + // for each item in savedRowIndex, this is the row number the entry follows private final List rowIndexValueCount = new ArrayList(); + + // are we building an index? + private final boolean buildIndex; + // If the number of keys in a dictionary is greater than this fraction of //the total number of non-null rows, turn off dictionary encoding private final double dictionaryKeySizeThreshold; protected boolean useDictionaryEncoding = true; private boolean isDirectV2 = true; - private boolean doneDictionaryCheck; - private final boolean strideDictionaryCheck; + private boolean needStrideDictionaryCheck; StringBaseTreeWriter(int columnId, TypeDescription schema, @@ -1212,132 +1422,180 @@ void recordPosition(PositionRecorder recorder) throws IOException { this.isDirectV2 = isNewWriteFormat(writer); stringOutput = writer.createStream(id, OrcProto.Stream.Kind.DICTIONARY_DATA); - lengthOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - rowOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.DATA), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(0L); + dataStream = writer.createStream(id, OrcProto.Stream.Kind.DATA); + OutStream lengthStream = writer.createStream(id, + OrcProto.Stream.Kind.LENGTH); + lengthOutput = createIntegerWriter(lengthStream, false, isDirectV2, + writer); + rowOutput = createIntegerWriter(dataStream, false, isDirectV2, writer); buildIndex = writer.buildIndex(); - directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); - directLengthOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); + Configuration conf = writer.getConfiguration(); dictionaryKeySizeThreshold = OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf); - strideDictionaryCheck = - OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); - doneDictionaryCheck = false; + needStrideDictionaryCheck = + isDirectV2 && + OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); + recordPosition(rowIndexEntry); } - private boolean checkDictionaryEncoding() { - if (!doneDictionaryCheck) { + private boolean checkDictionaryEncoding(boolean endOfStripe + ) throws IOException { + if ((endOfStripe || needStrideDictionaryCheck) && rows.size() > 0) { // Set the flag indicating whether or not to use dictionary encoding // based on whether or not the fraction of distinct keys over number of // non-null rows is less than the configured threshold - float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f; - useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold; - doneDictionaryCheck = true; + float ratio = (float) (dictionary.size()) / rows.size(); + useDictionaryEncoding = ratio <= dictionaryKeySizeThreshold; + needStrideDictionaryCheck = false; + if (!useDictionaryEncoding) { + switchToDirect(); + } } return useDictionaryEncoding; } @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - // if rows in stripe is less than dictionaryCheckAfterRows, dictionary - // checking would not have happened. So do it again here. - checkDictionaryEncoding(); - + void flush() throws IOException { + checkDictionaryEncoding(isDirectV2); + super.flush(); + lengthOutput.flush(); if (useDictionaryEncoding) { - flushDictionary(); + writeDictionaryStripe(); + stringOutput.flush(); + rowOutput.flush(); } else { - // flushout any left over entries from dictionary - if (rows.size() > 0) { - flushDictionary(); - } - + dataStream.flush(); // suppress the stream for every stripe if dictionary is disabled stringOutput.suppress(); } + } - // we need to build the rowindex before calling super, since it - // writes it out. + @Override + void writeStripe(OrcProto.StripeFooter.Builder builder, + int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - stringOutput.flush(); - lengthOutput.flush(); - rowOutput.flush(); - directStreamOutput.flush(); - directLengthOutput.flush(); - // reset all of the fields to be ready for the next stripe. dictionary.clear(); - savedRowIndex.clear(); - rowIndexValueCount.clear(); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(0L); + if (buildIndex) { + if (useDictionaryEncoding) { + // reset all of the fields to be ready for the next stripe. + savedRowIndex.clear(); + rowIndexValueCount.clear(); + } + // record the start positions of first index stride of next stripe + recordPosition(rowIndexEntry); + } + } - if (!useDictionaryEncoding) { - // record the start positions of first index stride of next stripe i.e - // beginning of the direct streams when dictionary is disabled - recordDirectStreamPosition(); + /** + * After trying the dictionary, fall back to a direct encoding. This + * means that we need to write the values that are currently stored + * and clear them out. + * @throws IOException + */ + private void switchToDirect() throws IOException { + int length = rows.size(); + int rowIndexEntryPosn = 0; + RowIndexEntry previous = null; + if (buildIndex) { + // get the first entry so that we can update its position + previous = savedRowIndex.isEmpty() ? rowIndexEntry : + savedRowIndex.get(0); + int offset = previous.getPositionCount(); + dataStream.getPosition(previous); + dataStreamStart = previous.getPosition(offset); + offset = previous.getPositionCount(); + lengthOutput.getPosition(previous); + lengthOutputStart = previous.getPosition(offset); + } + // write the values translated into the dump order. + for(int row = 0; row <= length; ++row) { + // now that we are writing out the row values, we can finalize the + // row index + if (buildIndex) { + while (rowIndexEntryPosn < savedRowIndex.size() && + row == rowIndexValueCount.get(rowIndexEntryPosn)) { + dataStream.registerCallback(previous.addCallback(dataStreamStart)); + lengthOutput.registerCallback(previous.addCallback( + lengthOutputStart)); + previous.activate(); + if (++rowIndexEntryPosn < savedRowIndex.size()) { + previous = savedRowIndex.get(rowIndexEntryPosn); + } else { + previous = rowIndexEntry; + } + int offset = previous.getPositionCount(); + dataStream.getPosition(previous); + dataStreamStart = previous.getPosition(offset); + offset = previous.getPositionCount(); + lengthOutput.getPosition(previous); + lengthOutputStart = previous.getPosition(offset); + } + } + if (row < length) { + int len = dictionary.write(dataStream, rows.get(row)); + lengthOutput.write(len); + } } + dictionary.clear(); + rows.clear(); + savedRowIndex.clear(); + rowIndexValueCount.clear(); } - private void flushDictionary() throws IOException { + private void writeDictionaryStripe() throws IOException { final int[] dumpOrder = new int[dictionary.size()]; - if (useDictionaryEncoding) { - // Write the dictionary by traversing the red-black tree writing out - // the bytes and lengths; and creating the map from the original order - // to the final sorted order. - - dictionary.visit(new StringRedBlackTree.Visitor() { - private int currentId = 0; - @Override - public void visit(StringRedBlackTree.VisitorContext context - ) throws IOException { - context.writeBytes(stringOutput); - lengthOutput.write(context.getLength()); - dumpOrder[context.getOriginalPosition()] = currentId++; - } - }); - } else { - // for direct encoding, we don't want the dictionary data stream - stringOutput.suppress(); - } + // Write the dictionary by traversing the red-black tree writing out + // the bytes and lengths; and creating the map from the original order + // to the final sorted order. + + dictionary.visit(new StringRedBlackTree.Visitor() { + private int currentId = 0; + @Override + public void visit(StringRedBlackTree.VisitorContext context + ) throws IOException { + context.writeBytes(stringOutput); + lengthOutput.write(context.getLength()); + dumpOrder[context.getOriginalPosition()] = currentId++; + } + }); + lengthOutput.flush(); + stringOutput.flush(); int length = rows.size(); int rowIndexEntry = 0; - OrcProto.RowIndex.Builder rowIndex = getRowIndex(); - Text text = new Text(); + RowIndexEntry previous = null; + if (buildIndex) { + previous = savedRowIndex.get(rowIndexEntry); + int offset = previous.getPositionCount(); + // fill in the missing positions for the data stream + rowOutput.getPosition(previous); + dataStreamStart = previous.getPosition(offset); + } // write the values translated into the dump order. - for(int i = 0; i <= length; ++i) { + for(int row = 0; row <= length; ++row) { // now that we are writing out the row values, we can finalize the // row index if (buildIndex) { - while (i == rowIndexValueCount.get(rowIndexEntry) && - rowIndexEntry < savedRowIndex.size()) { - OrcProto.RowIndexEntry.Builder base = - savedRowIndex.get(rowIndexEntry++).toBuilder(); - if (useDictionaryEncoding) { - rowOutput.getPosition(new RowIndexPositionRecorder(base)); - } else { - PositionRecorder posn = new RowIndexPositionRecorder(base); - directStreamOutput.getPosition(posn); - directLengthOutput.getPosition(posn); + while (rowIndexEntry < savedRowIndex.size() && + row == rowIndexValueCount.get(rowIndexEntry)) { + rowOutput.registerCallback(previous.addCallback( + dataStreamStart)); + previous.activate(); + // set the position on the next one + if (++rowIndexEntry < savedRowIndex.size()) { + previous = savedRowIndex.get(rowIndexEntry); + int offset = previous.getPositionCount(); + rowOutput.getPosition(previous); + dataStreamStart = previous.getPosition(offset); } - rowIndex.addEntry(base.build()); } } - if (i != length) { - if (useDictionaryEncoding) { - rowOutput.write(dumpOrder[rows.get(i)]); - } else { - dictionary.getText(text, rows.get(i)); - directStreamOutput.write(text.getBytes(), 0, text.getLength()); - directLengthOutput.write(text.getLength()); - } + if (row < length) { + rowOutput.write(dumpOrder[rows.get(row)]); } } + rowOutput.flush(); rows.clear(); } @@ -1364,43 +1622,40 @@ public void visit(StringRedBlackTree.VisitorContext context } /** - * This method doesn't call the super method, because unlike most of the - * other TreeWriters, this one can't record the position in the streams - * until the stripe is being flushed. Therefore it saves all of the entries - * and augments them with the final information as the stripe is written. + * When we are using dictionary encoding, this method is significantly + * different than the others. Because the data stream isn't written until + * the stripe is being flushed, we can't know the positions in the stream. * @throws IOException */ @Override - void createRowIndexEntry() throws IOException { - getStripeStatistics().merge(indexStatistics); - OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry(); - rowIndexEntry.setStatistics(indexStatistics.serialize()); - indexStatistics.reset(); - OrcProto.RowIndexEntry base = rowIndexEntry.build(); - savedRowIndex.add(base); - rowIndexEntry.clear(); - addBloomFilterEntry(); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(Long.valueOf(rows.size())); - if (strideDictionaryCheck) { - checkDictionaryEncoding(); - } - if (!useDictionaryEncoding) { - if (rows.size() > 0) { - flushDictionary(); - // just record the start positions of next index stride - recordDirectStreamPosition(); - } else { - // record the start positions of next index stride - recordDirectStreamPosition(); - getRowIndex().addEntry(base); - } + RowIndexEntry createRowIndexEntry() throws IOException { + checkDictionaryEncoding(false); + RowIndexEntry result = super.createRowIndexEntry(); + if (useDictionaryEncoding) { + rowIndexValueCount.add((long) rows.size()); + savedRowIndex.add(result); + } else { + dataStream.registerCallback(result.addCallback(dataStreamStart)); + lengthOutput.registerCallback(result.addCallback(lengthOutputStart)); + result.activate(); } + recordPosition(rowIndexEntry); + return result; } - private void recordDirectStreamPosition() throws IOException { - directStreamOutput.getPosition(rowIndexPosition); - directLengthOutput.getPosition(rowIndexPosition); + @Override + public void recordPosition(RowIndexEntry recorder) throws IOException { + super.recordPosition(recorder); + if (useDictionaryEncoding) { + // we'll fill this in later + } else { + int offset = recorder.getPositionCount(); + dataStream.getPosition(recorder); + dataStreamStart = recorder.getPosition(offset); + offset = recorder.getPositionCount(); + lengthOutput.getPosition(recorder); + lengthOutputStart = recorder.getPosition(offset); + } } @Override @@ -1422,42 +1677,59 @@ void writeBatch(ColumnVector vector, int offset, int length) throws IOException { super.writeBatch(vector, offset, length); BytesColumnVector vec = (BytesColumnVector) vector; - if (vector.isRepeating) { - if (vector.noNulls || !vector.isNull[0]) { - if (useDictionaryEncoding) { + if (useDictionaryEncoding) { + if (vector.isRepeating) { + if (vector.noNulls || !vector.isNull[0]) { int id = dictionary.add(vec.vector[0], vec.start[0], vec.length[0]); - for(int i=0; i < length; ++i) { + for (int i = 0; i < length; ++i) { rows.add(id); } - } else { - for(int i=0; i < length; ++i) { - directStreamOutput.write(vec.vector[0], vec.start[0], - vec.length[0]); - directLengthOutput.write(vec.length[0]); + indexStatistics.updateString(vec.vector[0], vec.start[0], + vec.length[0], length); + if (createBloomFilter) { + bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]); } } - indexStatistics.updateString(vec.vector[0], vec.start[0], - vec.length[0], length); - if (createBloomFilter) { - bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]); + } else { + for (int i = 0; i < length; ++i) { + if (vec.noNulls || !vec.isNull[i + offset]) { + rows.add(dictionary.add(vec.vector[offset + i], + vec.start[offset + i], vec.length[offset + i])); + indexStatistics.updateString(vec.vector[offset + i], + vec.start[offset + i], vec.length[offset + i], 1); + if (createBloomFilter) { + bloomFilter.addBytes(vec.vector[offset + i], + vec.start[offset + i], vec.length[offset + i]); + } + } } } } else { - for(int i=0; i < length; ++i) { - if (vec.noNulls || !vec.isNull[i + offset]) { - if (useDictionaryEncoding) { - rows.add(dictionary.add(vec.vector[offset + i], - vec.start[offset + i], vec.length[offset + i])); - } else { - directStreamOutput.write(vec.vector[offset + i], - vec.start[offset + i], vec.length[offset + i]); - directLengthOutput.write(vec.length[offset + i]); + if (vector.isRepeating) { + if (vector.noNulls || !vector.isNull[0]) { + for (int i = 0; i < length; ++i) { + dataStream.write(vec.vector[0], vec.start[0], + vec.length[0]); + lengthOutput.write(vec.length[0]); } - indexStatistics.updateString(vec.vector[offset + i], - vec.start[offset + i], vec.length[offset + i], 1); + indexStatistics.updateString(vec.vector[0], vec.start[0], + vec.length[0], length); if (createBloomFilter) { - bloomFilter.addBytes(vec.vector[offset + i], + bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]); + } + } + } else { + for (int i = 0; i < length; ++i) { + if (vec.noNulls || !vec.isNull[i + offset]) { + dataStream.write(vec.vector[offset + i], vec.start[offset + i], vec.length[offset + i]); + lengthOutput.write(vec.length[offset + i]); + indexStatistics.updateString(vec.vector[offset + i], + vec.start[offset + i], vec.length[offset + i], 1); + if (createBloomFilter) { + bloomFilter.addBytes(vec.vector[offset + i], + vec.start[offset + i], vec.length[offset + i]); + } } } } @@ -1507,8 +1779,8 @@ void writeBatch(ColumnVector vector, int offset, } } else { for(int i=0; i < length; ++i) { - directStreamOutput.write(ptr, ptrOffset, itemLength); - directLengthOutput.write(itemLength); + dataStream.write(ptr, ptrOffset, itemLength); + lengthOutput.write(itemLength); } } indexStatistics.updateString(ptr, ptrOffset, itemLength, length); @@ -1535,8 +1807,8 @@ void writeBatch(ColumnVector vector, int offset, if (useDictionaryEncoding) { rows.add(dictionary.add(ptr, ptrOffset, itemLength)); } else { - directStreamOutput.write(ptr, ptrOffset, itemLength); - directLengthOutput.write(itemLength); + dataStream.write(ptr, ptrOffset, itemLength); + lengthOutput.write(itemLength); } indexStatistics.updateString(ptr, ptrOffset, itemLength, 1); if (createBloomFilter) { @@ -1569,7 +1841,7 @@ void writeBatch(ColumnVector vector, int offset, BytesColumnVector vec = (BytesColumnVector) vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { - int itemLength = Math.min(vec.length[0], maxLength); + int itemLength = Math.min(maxLength, vec.length[0]); if (useDictionaryEncoding) { int id = dictionary.add(vec.vector[0], vec.start[0], itemLength); for(int i=0; i < length; ++i) { @@ -1577,13 +1849,12 @@ void writeBatch(ColumnVector vector, int offset, } } else { for(int i=0; i < length; ++i) { - directStreamOutput.write(vec.vector[0], vec.start[0], - itemLength); - directLengthOutput.write(itemLength); + dataStream.write(vec.vector[0], vec.start[0], itemLength); + lengthOutput.write(itemLength); } } - indexStatistics.updateString(vec.vector[0], vec.start[0], - itemLength, length); + indexStatistics.updateString(vec.vector[0], vec.start[0], itemLength, + length); if (createBloomFilter) { bloomFilter.addBytes(vec.vector[0], vec.start[0], itemLength); } @@ -1591,20 +1862,20 @@ void writeBatch(ColumnVector vector, int offset, } else { for(int i=0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { - int itemLength = Math.min(vec.length[offset + i], maxLength); + int itemLength = Math.min(maxLength, vec.length[i + offset]); if (useDictionaryEncoding) { - rows.add(dictionary.add(vec.vector[offset + i], - vec.start[offset + i], itemLength)); + rows.add(dictionary.add(vec.vector[i + offset], + vec.start[i + offset], itemLength)); } else { - directStreamOutput.write(vec.vector[offset + i], - vec.start[offset + i], itemLength); - directLengthOutput.write(itemLength); + dataStream.write(vec.vector[i + offset], vec.start[i + offset], + itemLength); + lengthOutput.write(itemLength); } - indexStatistics.updateString(vec.vector[offset + i], - vec.start[offset + i], itemLength, 1); + indexStatistics.updateString(vec.vector[i + offset], + vec.start[i + offset], itemLength, 1); if (createBloomFilter) { - bloomFilter.addBytes(vec.vector[offset + i], - vec.start[offset + i], itemLength); + bloomFilter.addBytes(vec.vector[i + offset], + vec.start[i + offset], itemLength); } } } @@ -1614,7 +1885,9 @@ void writeBatch(ColumnVector vector, int offset, private static class BinaryTreeWriter extends TreeWriter { private final PositionedOutputStream stream; + private long streamStart; private final IntegerWriter length; + private long lengthStart; private boolean isDirectV2 = true; BinaryTreeWriter(int columnId, @@ -1627,7 +1900,7 @@ void writeBatch(ColumnVector vector, int offset, this.isDirectV2 = isNewWriteFormat(writer); this.length = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1675,21 +1948,39 @@ void writeBatch(ColumnVector vector, int offset, } } + @Override + void flush() throws IOException { + super.flush(); + stream.flush(); + length.flush(); + } @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - length.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); stream.getPosition(recorder); + streamStart = recorder.getPosition(offset); + offset = recorder.getPositionCount(); length.getPosition(recorder); + lengthStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + stream.registerCallback(result.addCallback(streamStart)); + length.registerCallback(result.addCallback(lengthStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } @@ -1701,7 +1992,9 @@ void recordPosition(PositionRecorder recorder) throws IOException { private static class TimestampTreeWriter extends TreeWriter { private final IntegerWriter seconds; + private long secondsStart; private final IntegerWriter nanos; + private long nanosStart; private final boolean isDirectV2; private final long base_timestamp; @@ -1715,7 +2008,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { OrcProto.Stream.Kind.DATA), true, isDirectV2, writer); this.nanos = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); // for unit tests to set different time zones this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND; writer.useWriterTimeZone(true); @@ -1770,12 +2063,17 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + seconds.flush(); + nanos.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - seconds.flush(); - nanos.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } private static long formatNanos(int nanos) { @@ -1795,15 +2093,30 @@ private static long formatNanos(int nanos) { } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); seconds.getPosition(recorder); + secondsStart = recorder.getPosition(offset); + offset = recorder.getPositionCount(); nanos.getPosition(recorder); + nanosStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + seconds.registerCallback(result.addCallback(secondsStart)); + nanos.registerCallback(result.addCallback(nanosStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class DateTreeWriter extends TreeWriter { private final IntegerWriter writer; + private long writerStart; private final boolean isDirectV2; DateTreeWriter(int columnId, @@ -1815,7 +2128,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { OrcProto.Stream.Kind.DATA); this.isDirectV2 = isNewWriteFormat(writer); this.writer = createIntegerWriter(out, true, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1849,17 +2162,24 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + writer.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); writer.getPosition(recorder); + writerStart = recorder.getPosition(offset); } @Override @@ -1871,11 +2191,22 @@ void recordPosition(PositionRecorder recorder) throws IOException { return OrcProto.ColumnEncoding.newBuilder() .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + writer.registerCallback(result.addCallback(writerStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; + } } private static class DecimalTreeWriter extends TreeWriter { private final PositionedOutputStream valueStream; + private long valueStreamStart; private final IntegerWriter scaleStream; + private long scaleStreamStart; private final boolean isDirectV2; DecimalTreeWriter(int columnId, @@ -1887,7 +2218,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.scaleStream = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.SECONDARY), true, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -1935,19 +2266,38 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + valueStream.flush(); + scaleStream.flush(); + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - valueStream.flush(); - scaleStream.flush(); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); valueStream.getPosition(recorder); + valueStreamStart = recorder.getPosition(offset); + offset = recorder.getPositionCount(); scaleStream.getPosition(recorder); + scaleStreamStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + valueStream.registerCallback(result.addCallback(valueStreamStart)); + scaleStream.registerCallback(result.addCallback(scaleStreamStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } @@ -1964,7 +2314,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { children.get(i), writer, true); } - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -2022,18 +2372,35 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + for(TreeWriter child: childrenWriters) { + child.flush(); + } + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); for(TreeWriter child: childrenWriters) { child.writeStripe(builder, requiredIndexEntries); } - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class ListTreeWriter extends TreeWriter { private final IntegerWriter lengths; + private long lengthsStart; private final boolean isDirectV2; ListTreeWriter(int columnId, @@ -2047,7 +2414,7 @@ void writeStripe(OrcProto.StripeFooter.Builder builder, createTreeWriter(schema.getChildren().get(0), writer, true); lengths = createIntegerWriter(writer.createStream(columnId, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -2107,25 +2474,45 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + lengths.flush(); + for(TreeWriter child: childrenWriters) { + child.flush(); + } + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - lengths.flush(); for(TreeWriter child: childrenWriters) { child.writeStripe(builder, requiredIndexEntries); } - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); lengths.getPosition(recorder); + lengthsStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + lengths.registerCallback(result.addCallback(lengthsStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class MapTreeWriter extends TreeWriter { private final IntegerWriter lengths; + private long lengthsStart; private final boolean isDirectV2; MapTreeWriter(int columnId, @@ -2142,7 +2529,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { createTreeWriter(children.get(1), writer, true); lengths = createIntegerWriter(writer.createStream(columnId, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -2207,25 +2594,45 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + lengths.flush(); + for(TreeWriter child: childrenWriters) { + child.flush(); + } + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - lengths.flush(); for(TreeWriter child: childrenWriters) { child.writeStripe(builder, requiredIndexEntries); } - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); lengths.getPosition(recorder); + lengthsStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + lengths.registerCallback(result.addCallback(lengthsStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } private static class UnionTreeWriter extends TreeWriter { private final RunLengthByteWriter tags; + private long tagsStart; UnionTreeWriter(int columnId, TypeDescription schema, @@ -2241,7 +2648,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { tags = new RunLengthByteWriter(writer.createStream(columnId, OrcProto.Stream.Kind.DATA)); - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override @@ -2297,20 +2704,39 @@ void writeBatch(ColumnVector vector, int offset, } @Override + void flush() throws IOException { + super.flush(); + tags.flush(); + for(TreeWriter child: childrenWriters) { + child.flush(); + } + } + + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { super.writeStripe(builder, requiredIndexEntries); - tags.flush(); for(TreeWriter child: childrenWriters) { child.writeStripe(builder, requiredIndexEntries); } - recordPosition(rowIndexPosition); + recordPosition(rowIndexEntry); } @Override - void recordPosition(PositionRecorder recorder) throws IOException { + void recordPosition(RowIndexEntry recorder) throws IOException { super.recordPosition(recorder); + int offset = recorder.getPositionCount(); tags.getPosition(recorder); + tagsStart = recorder.getPosition(offset); + } + + @Override + RowIndexEntry createRowIndexEntry() throws IOException { + RowIndexEntry result = super.createRowIndexEntry(); + tags.registerCallback(result.addCallback(tagsStart)); + result.activate(); + recordPosition(rowIndexEntry); + return result; } } @@ -2495,6 +2921,7 @@ private void flushStripe() throws IOException { (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride); OrcProto.StripeFooter.Builder builder = OrcProto.StripeFooter.newBuilder(); + treeWriter.flush(); treeWriter.writeStripe(builder, requiredIndexEntries); long indexSize = 0; long dataSize = 0; @@ -2759,8 +3186,7 @@ public void addRowBatch(VectorizedRowBatch batch) throws IOException { // right size indexes. int posn = 0; while (posn < batch.size) { - int chunkSize = Math.min(batch.size - posn, - rowIndexStride - rowsInIndex); + int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex); treeWriter.writeRootBatch(batch, posn, chunkSize); posn += chunkSize; rowsInIndex += chunkSize; diff --git orc/src/java/org/apache/orc/tools/FileDump.java orc/src/java/org/apache/orc/tools/FileDump.java index e32027f..766259e 100644 --- orc/src/java/org/apache/orc/tools/FileDump.java +++ orc/src/java/org/apache/orc/tools/FileDump.java @@ -697,6 +697,13 @@ private static String getFormattedRowIndices(int col, } buf.append(entry.getPositions(posIx)); } + buf.append(" lengths: "); + for(int len=0; len < entry.getLengthsCount(); ++len) { + if (len != 0) { + buf.append(","); + } + buf.append(entry.getLengths(len)); + } } return buf.toString(); } diff --git orc/src/java/org/apache/orc/tools/JsonFileDump.java orc/src/java/org/apache/orc/tools/JsonFileDump.java index 75153a2..d567c49 100644 --- orc/src/java/org/apache/orc/tools/JsonFileDump.java +++ orc/src/java/org/apache/orc/tools/JsonFileDump.java @@ -398,6 +398,11 @@ private static void writeRowGroupIndexes(JSONWriter writer, int col, writer.value(entry.getPositions(posIx)); } writer.endArray(); + writer.key("lengths").array(); + for (int len = 0; len < entry.getLengthsCount(); ++len) { + writer.value(entry.getLengths(len)); + } + writer.endArray(); writer.endObject(); } writer.endArray(); diff --git orc/src/protobuf/orc_proto.proto orc/src/protobuf/orc_proto.proto index f4935b4..2d18ec3 100644 --- orc/src/protobuf/orc_proto.proto +++ orc/src/protobuf/orc_proto.proto @@ -82,6 +82,7 @@ message ColumnStatistics { message RowIndexEntry { repeated uint64 positions = 1 [packed=true]; optional ColumnStatistics statistics = 2; + repeated uint32 lengths = 3 [packed=true]; } message RowIndex { diff --git orc/src/test/org/apache/orc/TestVectorOrcFile.java orc/src/test/org/apache/orc/TestVectorOrcFile.java index 112edb9..b5c2d97 100644 --- orc/src/test/org/apache/orc/TestVectorOrcFile.java +++ orc/src/test/org/apache/orc/TestVectorOrcFile.java @@ -164,6 +164,8 @@ public void openFileSystem () throws Exception { testFilePath = new Path(workDir, "TestVectorOrcFile." + testCaseName.getMethodName() + ".orc"); fs.delete(testFilePath, false); + // clear the memory manager between tests + OrcFile.getStaticMemoryManager(conf).clearAll(); } @Test @@ -1926,8 +1928,9 @@ public double getAllocationScale() { @Override public void addedRow(int count) throws IOException { rows += count; - if (rows % 100 == 0) { + if (rows > 100) { callback.checkMemory(rate); + rows = 0; } } } diff --git orc/src/test/org/apache/orc/tools/TestJsonFileDump.java orc/src/test/org/apache/orc/tools/TestJsonFileDump.java index a514824..d04b436 100644 --- orc/src/test/org/apache/orc/tools/TestJsonFileDump.java +++ orc/src/test/org/apache/orc/tools/TestJsonFileDump.java @@ -147,4 +147,59 @@ public void testJsonDump() throws Exception { checkOutput(outputFilename, workDir + File.separator + outputFilename); } + + @Test + public void testEntryLengths() throws Exception { + TypeDescription schema = TypeDescription.createStruct() + .addField("s1", TypeDescription.createString()) + .addField("s2", TypeDescription.createVarchar().withMaxLength(300)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .compress(CompressionKind.NONE) + .rowIndexStride(1000)); + VectorizedRowBatch batch = schema.createRowBatch(); + + // write 1k of nulls + batch.size = 1024; + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + batch.cols[1].isRepeating = true; + batch.cols[1].noNulls = false; + batch.cols[1].isNull[0] = true; + writer.addRowBatch(batch); + + // write 100k of non-nulls (both s1 = dictionary, s2 = non-dictionary) + // row group size is 1000 and we don't have compression, so the lengths + // should just match to the rle ends + batch.reset(); + batch.size = 1024; + byte[][] dictionaryValues = new byte[256][]; + for(int d=0; d < dictionaryValues.length; ++d) { + dictionaryValues[d] = String.format("%02x", d).getBytes(); + } + for(int r = 0; r < 1024; ++r) { + ((BytesColumnVector) batch.cols[0]).setRef(r, dictionaryValues[r % 256], + 0, dictionaryValues[r %256].length); + byte[] value = String.format("%04d", r).getBytes(); + ((BytesColumnVector) batch.cols[1]).setRef(r, value, 0, value.length); + } + for(int b=0; b < 100; b++) { + writer.addRowBatch(batch); + } + writer.close(); + + PrintStream origOut = System.out; + String outputFilename = "orc-file-entry-lengths.json"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=1,2"}); + System.out.flush(); + System.setOut(origOut); + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } } diff --git orc/src/test/resources/orc-file-dump-bloomfilter.out orc/src/test/resources/orc-file-dump-bloomfilter.out index 18fd2fb..66f9019 100644 --- orc/src/test/resources/orc-file-dump-bloomfilter.out +++ orc/src/test/resources/orc-file-dump-bloomfilter.out @@ -39,27 +39,27 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951 + Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 985 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 169 - Stream: column 3 section ROW_INDEX start: 355 length 87 - Stream: column 3 section BLOOM_FILTER start: 442 length 512 - Stream: column 1 section DATA start: 954 length 20035 - Stream: column 2 section DATA start: 20989 length 40050 - Stream: column 3 section DATA start: 61039 length 3543 - Stream: column 3 section LENGTH start: 64582 length 25 - Stream: column 3 section DICTIONARY_DATA start: 64607 length 133 + Stream: column 1 section ROW_INDEX start: 20 length 178 + Stream: column 2 section ROW_INDEX start: 198 length 184 + Stream: column 3 section ROW_INDEX start: 382 length 94 + Stream: column 3 section BLOOM_FILTER start: 476 length 512 + Stream: column 1 section DATA start: 988 length 20035 + Stream: column 2 section DATA start: 21023 length 40050 + Stream: column 3 section DATA start: 61073 length 3543 + Stream: column 3 section LENGTH start: 64616 length 25 + Stream: column 3 section DICTIONARY_DATA start: 64641 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 lengths: 3543 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 lengths: 3543 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 lengths: 3543 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 lengths: 3543 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 lengths: 3543 Bloom filters for column 3: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 @@ -67,27 +67,27 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944 - Stream: column 0 section ROW_INDEX start: 64826 length 17 - Stream: column 1 section ROW_INDEX start: 64843 length 164 - Stream: column 2 section ROW_INDEX start: 65007 length 168 - Stream: column 3 section ROW_INDEX start: 65175 length 83 - Stream: column 3 section BLOOM_FILTER start: 65258 length 512 - Stream: column 1 section DATA start: 65770 length 20035 - Stream: column 2 section DATA start: 85805 length 40050 - Stream: column 3 section DATA start: 125855 length 3532 - Stream: column 3 section LENGTH start: 129387 length 25 - Stream: column 3 section DICTIONARY_DATA start: 129412 length 133 + Stripe: offset: 64860 data: 63775 rows: 5000 tail: 86 index: 982 + Stream: column 0 section ROW_INDEX start: 64860 length 17 + Stream: column 1 section ROW_INDEX start: 64877 length 180 + Stream: column 2 section ROW_INDEX start: 65057 length 185 + Stream: column 3 section ROW_INDEX start: 65242 length 88 + Stream: column 3 section BLOOM_FILTER start: 65330 length 512 + Stream: column 1 section DATA start: 65842 length 20035 + Stream: column 2 section DATA start: 85877 length 40050 + Stream: column 3 section DATA start: 125927 length 3532 + Stream: column 3 section LENGTH start: 129459 length 25 + Stream: column 3 section DICTIONARY_DATA start: 129484 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 lengths: 3532 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 lengths: 3532 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 lengths: 3532 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 lengths: 3532 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 lengths: 3532 Bloom filters for column 3: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 @@ -95,27 +95,27 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 129631 data: 63787 rows: 5000 tail: 86 index: 950 - Stream: column 0 section ROW_INDEX start: 129631 length 17 - Stream: column 1 section ROW_INDEX start: 129648 length 163 - Stream: column 2 section ROW_INDEX start: 129811 length 168 - Stream: column 3 section ROW_INDEX start: 129979 length 90 - Stream: column 3 section BLOOM_FILTER start: 130069 length 512 - Stream: column 1 section DATA start: 130581 length 20035 - Stream: column 2 section DATA start: 150616 length 40050 - Stream: column 3 section DATA start: 190666 length 3544 - Stream: column 3 section LENGTH start: 194210 length 25 - Stream: column 3 section DICTIONARY_DATA start: 194235 length 133 + Stripe: offset: 129703 data: 63787 rows: 5000 tail: 86 index: 984 + Stream: column 0 section ROW_INDEX start: 129703 length 17 + Stream: column 1 section ROW_INDEX start: 129720 length 177 + Stream: column 2 section ROW_INDEX start: 129897 length 184 + Stream: column 3 section ROW_INDEX start: 130081 length 94 + Stream: column 3 section BLOOM_FILTER start: 130175 length 512 + Stream: column 1 section DATA start: 130687 length 20035 + Stream: column 2 section DATA start: 150722 length 40050 + Stream: column 3 section DATA start: 190772 length 3544 + Stream: column 3 section LENGTH start: 194316 length 25 + Stream: column 3 section DICTIONARY_DATA start: 194341 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 lengths: 3544 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 lengths: 3544 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 lengths: 3544 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 lengths: 3544 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 lengths: 3544 Bloom filters for column 3: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 @@ -123,27 +123,27 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 194454 data: 63817 rows: 5000 tail: 86 index: 952 - Stream: column 0 section ROW_INDEX start: 194454 length 17 - Stream: column 1 section ROW_INDEX start: 194471 length 165 - Stream: column 2 section ROW_INDEX start: 194636 length 167 - Stream: column 3 section ROW_INDEX start: 194803 length 91 - Stream: column 3 section BLOOM_FILTER start: 194894 length 512 - Stream: column 1 section DATA start: 195406 length 20035 - Stream: column 2 section DATA start: 215441 length 40050 - Stream: column 3 section DATA start: 255491 length 3574 - Stream: column 3 section LENGTH start: 259065 length 25 - Stream: column 3 section DICTIONARY_DATA start: 259090 length 133 + Stripe: offset: 194560 data: 63817 rows: 5000 tail: 86 index: 988 + Stream: column 0 section ROW_INDEX start: 194560 length 17 + Stream: column 1 section ROW_INDEX start: 194577 length 180 + Stream: column 2 section ROW_INDEX start: 194757 length 182 + Stream: column 3 section ROW_INDEX start: 194939 length 97 + Stream: column 3 section BLOOM_FILTER start: 195036 length 512 + Stream: column 1 section DATA start: 195548 length 20035 + Stream: column 2 section DATA start: 215583 length 40050 + Stream: column 3 section DATA start: 255633 length 3574 + Stream: column 3 section LENGTH start: 259207 length 25 + Stream: column 3 section DICTIONARY_DATA start: 259232 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 lengths: 3574 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 lengths: 3574 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 lengths: 3574 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 lengths: 3574 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 lengths: 3574 Bloom filters for column 3: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 @@ -151,28 +151,28 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 259309 data: 12943 rows: 1000 tail: 78 index: 432 - Stream: column 0 section ROW_INDEX start: 259309 length 12 - Stream: column 1 section ROW_INDEX start: 259321 length 38 - Stream: column 2 section ROW_INDEX start: 259359 length 41 - Stream: column 3 section ROW_INDEX start: 259400 length 40 - Stream: column 3 section BLOOM_FILTER start: 259440 length 301 - Stream: column 1 section DATA start: 259741 length 4007 - Stream: column 2 section DATA start: 263748 length 8010 - Stream: column 3 section DATA start: 271758 length 768 - Stream: column 3 section LENGTH start: 272526 length 25 - Stream: column 3 section DICTIONARY_DATA start: 272551 length 133 + Stripe: offset: 259451 data: 12943 rows: 1000 tail: 78 index: 444 + Stream: column 0 section ROW_INDEX start: 259451 length 12 + Stream: column 1 section ROW_INDEX start: 259463 length 42 + Stream: column 2 section ROW_INDEX start: 259505 length 45 + Stream: column 3 section ROW_INDEX start: 259550 length 44 + Stream: column 3 section BLOOM_FILTER start: 259594 length 301 + Stream: column 1 section DATA start: 259895 length 4007 + Stream: column 2 section DATA start: 263902 length 8010 + Stream: column 3 section DATA start: 271912 length 768 + Stream: column 3 section LENGTH start: 272680 length 25 + Stream: column 3 section DICTIONARY_DATA start: 272705 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 lengths: 768 Bloom filters for column 3: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 -File length: 273307 bytes +File length: 273461 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git orc/src/test/resources/orc-file-dump-bloomfilter2.out orc/src/test/resources/orc-file-dump-bloomfilter2.out index fa5cc2d..c86d8f8 100644 --- orc/src/test/resources/orc-file-dump-bloomfilter2.out +++ orc/src/test/resources/orc-file-dump-bloomfilter2.out @@ -39,27 +39,27 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63786 rows: 5000 tail: 85 index: 6974 + Stripe: offset: 3 data: 63786 rows: 5000 tail: 85 index: 7008 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 169 - Stream: column 2 section BLOOM_FILTER start: 355 length 6535 - Stream: column 3 section ROW_INDEX start: 6890 length 87 - Stream: column 1 section DATA start: 6977 length 20035 - Stream: column 2 section DATA start: 27012 length 40050 - Stream: column 3 section DATA start: 67062 length 3543 - Stream: column 3 section LENGTH start: 70605 length 25 - Stream: column 3 section DICTIONARY_DATA start: 70630 length 133 + Stream: column 1 section ROW_INDEX start: 20 length 178 + Stream: column 2 section ROW_INDEX start: 198 length 184 + Stream: column 2 section BLOOM_FILTER start: 382 length 6535 + Stream: column 3 section ROW_INDEX start: 6917 length 94 + Stream: column 1 section DATA start: 7011 length 20035 + Stream: column 2 section DATA start: 27046 length 40050 + Stream: column 3 section DATA start: 67096 length 3543 + Stream: column 3 section LENGTH start: 70639 length 25 + Stream: column 3 section DICTIONARY_DATA start: 70664 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 lengths: 11357 Bloom filters for column 2: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4931 loadFactor: 0.5136 expectedFpp: 0.009432924 Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4956 loadFactor: 0.5163 expectedFpp: 0.009772834 @@ -67,27 +67,27 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482 - Stripe: offset: 70848 data: 63775 rows: 5000 tail: 85 index: 6965 - Stream: column 0 section ROW_INDEX start: 70848 length 17 - Stream: column 1 section ROW_INDEX start: 70865 length 164 - Stream: column 2 section ROW_INDEX start: 71029 length 168 - Stream: column 2 section BLOOM_FILTER start: 71197 length 6533 - Stream: column 3 section ROW_INDEX start: 77730 length 83 - Stream: column 1 section DATA start: 77813 length 20035 - Stream: column 2 section DATA start: 97848 length 40050 - Stream: column 3 section DATA start: 137898 length 3532 - Stream: column 3 section LENGTH start: 141430 length 25 - Stream: column 3 section DICTIONARY_DATA start: 141455 length 133 + Stripe: offset: 70882 data: 63775 rows: 5000 tail: 85 index: 7003 + Stream: column 0 section ROW_INDEX start: 70882 length 17 + Stream: column 1 section ROW_INDEX start: 70899 length 180 + Stream: column 2 section ROW_INDEX start: 71079 length 185 + Stream: column 2 section BLOOM_FILTER start: 71264 length 6533 + Stream: column 3 section ROW_INDEX start: 77797 length 88 + Stream: column 1 section DATA start: 77885 length 20035 + Stream: column 2 section DATA start: 97920 length 40050 + Stream: column 3 section DATA start: 137970 length 3532 + Stream: column 3 section LENGTH start: 141502 length 25 + Stream: column 3 section DICTIONARY_DATA start: 141527 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 lengths: 11357 Bloom filters for column 2: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4988 loadFactor: 0.5196 expectedFpp: 0.010223193 @@ -95,27 +95,27 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205 - Stripe: offset: 141673 data: 63787 rows: 5000 tail: 85 index: 6971 - Stream: column 0 section ROW_INDEX start: 141673 length 17 - Stream: column 1 section ROW_INDEX start: 141690 length 163 - Stream: column 2 section ROW_INDEX start: 141853 length 168 - Stream: column 2 section BLOOM_FILTER start: 142021 length 6533 - Stream: column 3 section ROW_INDEX start: 148554 length 90 - Stream: column 1 section DATA start: 148644 length 20035 - Stream: column 2 section DATA start: 168679 length 40050 - Stream: column 3 section DATA start: 208729 length 3544 - Stream: column 3 section LENGTH start: 212273 length 25 - Stream: column 3 section DICTIONARY_DATA start: 212298 length 133 + Stripe: offset: 141745 data: 63787 rows: 5000 tail: 85 index: 7005 + Stream: column 0 section ROW_INDEX start: 141745 length 17 + Stream: column 1 section ROW_INDEX start: 141762 length 177 + Stream: column 2 section ROW_INDEX start: 141939 length 184 + Stream: column 2 section BLOOM_FILTER start: 142123 length 6533 + Stream: column 3 section ROW_INDEX start: 148656 length 94 + Stream: column 1 section DATA start: 148750 length 20035 + Stream: column 2 section DATA start: 168785 length 40050 + Stream: column 3 section DATA start: 208835 length 3544 + Stream: column 3 section LENGTH start: 212379 length 25 + Stream: column 3 section DICTIONARY_DATA start: 212404 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 lengths: 11357 Bloom filters for column 2: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4967 loadFactor: 0.5174 expectedFpp: 0.009925688 Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575 @@ -123,27 +123,27 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444 - Stripe: offset: 212516 data: 63817 rows: 5000 tail: 85 index: 6964 - Stream: column 0 section ROW_INDEX start: 212516 length 17 - Stream: column 1 section ROW_INDEX start: 212533 length 165 - Stream: column 2 section ROW_INDEX start: 212698 length 167 - Stream: column 2 section BLOOM_FILTER start: 212865 length 6524 - Stream: column 3 section ROW_INDEX start: 219389 length 91 - Stream: column 1 section DATA start: 219480 length 20035 - Stream: column 2 section DATA start: 239515 length 40050 - Stream: column 3 section DATA start: 279565 length 3574 - Stream: column 3 section LENGTH start: 283139 length 25 - Stream: column 3 section DICTIONARY_DATA start: 283164 length 133 + Stripe: offset: 212622 data: 63817 rows: 5000 tail: 85 index: 7000 + Stream: column 0 section ROW_INDEX start: 212622 length 17 + Stream: column 1 section ROW_INDEX start: 212639 length 180 + Stream: column 2 section ROW_INDEX start: 212819 length 182 + Stream: column 2 section BLOOM_FILTER start: 213001 length 6524 + Stream: column 3 section ROW_INDEX start: 219525 length 97 + Stream: column 1 section DATA start: 219622 length 20035 + Stream: column 2 section DATA start: 239657 length 40050 + Stream: column 3 section DATA start: 279707 length 3574 + Stream: column 3 section LENGTH start: 283281 length 25 + Stream: column 3 section DICTIONARY_DATA start: 283306 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 lengths: 11357 Bloom filters for column 2: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4951 loadFactor: 0.5157 expectedFpp: 0.009704026 Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4969 loadFactor: 0.5176 expectedFpp: 0.009953696 @@ -151,28 +151,28 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165 - Stripe: offset: 283382 data: 12943 rows: 1000 tail: 78 index: 1468 - Stream: column 0 section ROW_INDEX start: 283382 length 12 - Stream: column 1 section ROW_INDEX start: 283394 length 38 - Stream: column 2 section ROW_INDEX start: 283432 length 41 - Stream: column 2 section BLOOM_FILTER start: 283473 length 1337 - Stream: column 3 section ROW_INDEX start: 284810 length 40 - Stream: column 1 section DATA start: 284850 length 4007 - Stream: column 2 section DATA start: 288857 length 8010 - Stream: column 3 section DATA start: 296867 length 768 - Stream: column 3 section LENGTH start: 297635 length 25 - Stream: column 3 section DICTIONARY_DATA start: 297660 length 133 + Stripe: offset: 283524 data: 12943 rows: 1000 tail: 78 index: 1480 + Stream: column 0 section ROW_INDEX start: 283524 length 12 + Stream: column 1 section ROW_INDEX start: 283536 length 42 + Stream: column 2 section ROW_INDEX start: 283578 length 45 + Stream: column 2 section BLOOM_FILTER start: 283623 length 1337 + Stream: column 3 section ROW_INDEX start: 284960 length 44 + Stream: column 1 section DATA start: 285004 length 4007 + Stream: column 2 section DATA start: 289011 length 8010 + Stream: column 3 section DATA start: 297021 length 768 + Stream: column 3 section LENGTH start: 297789 length 25 + Stream: column 3 section DICTIONARY_DATA start: 297814 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 lengths: 8010 Bloom filters for column 2: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 -File length: 298416 bytes +File length: 298570 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git orc/src/test/resources/orc-file-dump-dictionary-threshold.out orc/src/test/resources/orc-file-dump-dictionary-threshold.out index 17a964b..5b8ccb1 100644 --- orc/src/test/resources/orc-file-dump-dictionary-threshold.out +++ orc/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -39,151 +39,151 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238 Stripes: - Stripe: offset: 3 data: 163602 rows: 5000 tail: 68 index: 720 + Stripe: offset: 3 data: 163585 rows: 5000 tail: 69 index: 783 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 171 - Stream: column 3 section ROW_INDEX start: 357 length 366 - Stream: column 1 section DATA start: 723 length 20035 - Stream: column 2 section DATA start: 20758 length 40050 - Stream: column 3 section DATA start: 60808 length 99226 - Stream: column 3 section LENGTH start: 160034 length 4291 + Stream: column 1 section ROW_INDEX start: 20 length 178 + Stream: column 2 section ROW_INDEX start: 198 length 185 + Stream: column 3 section ROW_INDEX start: 383 length 403 + Stream: column 1 section DATA start: 786 length 20035 + Stream: column 2 section DATA start: 20821 length 40050 + Stream: column 3 section DATA start: 60871 length 99226 + Stream: column 3 section LENGTH start: 160097 length 4274 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 4767,2058,0,695,18 - Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 16464,3340,0,1554,14 - Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 36532,964,0,2372,90 - Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 63067,3432,0,3354,108 - Stripe: offset: 164393 data: 368335 rows: 5000 tail: 69 index: 956 - Stream: column 0 section ROW_INDEX start: 164393 length 17 - Stream: column 1 section ROW_INDEX start: 164410 length 157 - Stream: column 2 section ROW_INDEX start: 164567 length 166 - Stream: column 3 section ROW_INDEX start: 164733 length 616 - Stream: column 1 section DATA start: 165349 length 20035 - Stream: column 2 section DATA start: 185384 length 40050 - Stream: column 3 section DATA start: 225434 length 302715 - Stream: column 3 section LENGTH start: 528149 length 5535 + Entry 0: count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0 lengths: 5881,3927 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 4767,2058,0,695,18 lengths: 12748,3927 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 16464,3340,0,1554,14 lengths: 21096,3927 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 36532,964,0,2372,90 lengths: 27674,3927 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 63067,3432,0,3354,108 lengths: 36159,4274 + Stripe: offset: 164440 data: 368332 rows: 5000 tail: 69 index: 1012 + Stream: column 0 section ROW_INDEX start: 164440 length 17 + Stream: column 1 section ROW_INDEX start: 164457 length 171 + Stream: column 2 section ROW_INDEX start: 164628 length 181 + Stream: column 3 section ROW_INDEX start: 164809 length 643 + Stream: column 1 section DATA start: 165452 length 20035 + Stream: column 2 section DATA start: 185487 length 40050 + Stream: column 3 section DATA start: 225537 length 302715 + Stream: column 3 section LENGTH start: 528252 length 5532 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 43833,2480,0,967,90 - Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 94117,3404,0,1945,222 - Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 155111,2864,0,3268,48 - Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 224570,1006,0,4064,342 - Stripe: offset: 533753 data: 606074 rows: 5000 tail: 69 index: 1427 - Stream: column 0 section ROW_INDEX start: 533753 length 17 - Stream: column 1 section ROW_INDEX start: 533770 length 167 - Stream: column 2 section ROW_INDEX start: 533937 length 168 - Stream: column 3 section ROW_INDEX start: 534105 length 1075 - Stream: column 1 section DATA start: 535180 length 20035 - Stream: column 2 section DATA start: 555215 length 40050 - Stream: column 3 section DATA start: 595265 length 540210 - Stream: column 3 section LENGTH start: 1135475 length 5779 + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0 lengths: 44920,4045 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 43833,2480,0,967,90 lengths: 51393,4045 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 94117,3404,0,1945,222 lengths: 62147,4045 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 155111,2864,0,3268,48 lengths: 70637,5532 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 224570,1006,0,4064,342 lengths: 78145,5532 + Stripe: offset: 533853 data: 606071 rows: 5000 tail: 69 index: 1520 + Stream: column 0 section ROW_INDEX start: 533853 length 17 + Stream: column 1 section ROW_INDEX start: 533870 length 179 + Stream: column 2 section ROW_INDEX start: 534049 length 183 + Stream: column 3 section ROW_INDEX start: 534232 length 1141 + Stream: column 1 section DATA start: 535373 length 20035 + Stream: column 2 section DATA start: 555408 length 40050 + Stream: column 3 section DATA start: 595458 length 540210 + Stream: column 3 section LENGTH start: 1135668 length 5776 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 87800,2584,0,1097,28 - Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 185635,3966,0,2077,162 - Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 295550,1384,0,3369,16 - Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 412768,1156,0,4041,470 - Stripe: offset: 1141323 data: 864001 rows: 5000 tail: 69 index: 1975 - Stream: column 0 section ROW_INDEX start: 1141323 length 17 - Stream: column 1 section ROW_INDEX start: 1141340 length 156 - Stream: column 2 section ROW_INDEX start: 1141496 length 168 - Stream: column 3 section ROW_INDEX start: 1141664 length 1634 - Stream: column 1 section DATA start: 1143298 length 20035 - Stream: column 2 section DATA start: 1163333 length 40050 - Stream: column 3 section DATA start: 1203383 length 798014 - Stream: column 3 section LENGTH start: 2001397 length 5902 + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0 lengths: 88925,4084 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 87800,2584,0,1097,28 lengths: 98971,4084 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 185635,3966,0,2077,162 lengths: 111078,4084 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 295550,1384,0,3369,16 lengths: 118212,5776 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 412768,1156,0,4041,470 lengths: 127442,5776 + Stripe: offset: 1141513 data: 863962 rows: 5000 tail: 69 index: 2046 + Stream: column 0 section ROW_INDEX start: 1141513 length 17 + Stream: column 1 section ROW_INDEX start: 1141530 length 170 + Stream: column 2 section ROW_INDEX start: 1141700 length 184 + Stream: column 3 section ROW_INDEX start: 1141884 length 1675 + Stream: column 1 section DATA start: 1143559 length 20035 + Stream: column 2 section DATA start: 1163594 length 40050 + Stream: column 3 section DATA start: 1203644 length 798014 + Stream: column 3 section LENGTH start: 2001658 length 5863 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 139298,1396,0,1077,140 - Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 286457,302,0,1926,462 - Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 447943,3328,0,3444,250 - Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 616471,3986,3778,547,292 - Stripe: offset: 2007368 data: 207295 rows: 1000 tail: 67 index: 841 - Stream: column 0 section ROW_INDEX start: 2007368 length 12 - Stream: column 1 section ROW_INDEX start: 2007380 length 38 - Stream: column 2 section ROW_INDEX start: 2007418 length 41 - Stream: column 3 section ROW_INDEX start: 2007459 length 750 - Stream: column 1 section DATA start: 2008209 length 4007 - Stream: column 2 section DATA start: 2012216 length 8010 - Stream: column 3 section DATA start: 2020226 length 194018 - Stream: column 3 section LENGTH start: 2214244 length 1260 + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0 lengths: 140457,3778 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 139298,1396,0,1077,140 lengths: 148080,3778 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 286457,302,0,1926,462 lengths: 162782,3778 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 447943,3328,0,3444,250 lengths: 169680,5863 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 616471,3986,3778,547,292 lengths: 181543,2085 + Stripe: offset: 2007590 data: 207282 rows: 1000 tail: 67 index: 862 + Stream: column 0 section ROW_INDEX start: 2007590 length 12 + Stream: column 1 section ROW_INDEX start: 2007602 length 42 + Stream: column 2 section ROW_INDEX start: 2007644 length 45 + Stream: column 3 section ROW_INDEX start: 2007689 length 763 + Stream: column 1 section DATA start: 2008452 length 4007 + Stream: column 2 section DATA start: 2012459 length 8010 + Stream: column 3 section DATA start: 2020469 length 194018 + Stream: column 3 section LENGTH start: 2214487 length 1247 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0 lengths: 4007 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0 lengths: 8010 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0 + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0 lengths: 194018,1247 -File length: 2217685 bytes +File length: 2217912 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git orc/src/test/resources/orc-file-dump.json orc/src/test/resources/orc-file-dump.json index bf654a1..702e81f 100644 --- orc/src/test/resources/orc-file-dump.json +++ orc/src/test/resources/orc-file-dump.json @@ -254,7 +254,7 @@ "stripeNumber": 1, "stripeInformation": { "offset": 3, - "indexLength": 970, + "indexLength": 1004, "dataLength": 63770, "footerLength": 90, "rowCount": 5000 @@ -270,60 +270,60 @@ "columnId": 1, "section": "ROW_INDEX", "startOffset": 20, - "length": 167 + "length": 180 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 187, - "length": 171 + "startOffset": 200, + "length": 187 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 358, - "length": 103 + "startOffset": 387, + "length": 108 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 461, + "startOffset": 495, "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 973, + "startOffset": 1007, "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 21008, + "startOffset": 21042, "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 61058, + "startOffset": 61092, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 61075, + "startOffset": 61109, "length": 3510 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 64585, + "startOffset": 64619, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 64610, + "startOffset": 64644, "length": 133 } ], @@ -365,6 +365,10 @@ 0, 0, 0 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -383,6 +387,10 @@ 0, 736, 23 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -401,6 +409,10 @@ 0, 1473, 43 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -419,6 +431,10 @@ 0, 2067, 261 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -437,6 +453,10 @@ 0, 2992, 35 + ], + "lengths": [ + 17, + 3510 ] } ], @@ -494,77 +514,77 @@ { "stripeNumber": 2, "stripeInformation": { - "offset": 64833, - "indexLength": 961, + "offset": 64867, + "indexLength": 995, "dataLength": 63763, - "footerLength": 88, + "footerLength": 90, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 64833, + "startOffset": 64867, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 64850, - "length": 166 + "startOffset": 64884, + "length": 180 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 65016, - "length": 166 + "startOffset": 65064, + "length": 181 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 65182, - "length": 100 + "startOffset": 65245, + "length": 105 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 65282, + "startOffset": 65350, "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 65794, + "startOffset": 65862, "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 85829, + "startOffset": 85897, "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 125879, + "startOffset": 125947, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 125896, + "startOffset": 125964, "length": 3503 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 129399, + "startOffset": 129467, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 129424, + "startOffset": 129492, "length": 133 } ], @@ -606,6 +626,10 @@ 0, 0, 0 + ], + "lengths": [ + 17, + 3503 ] }, { @@ -624,6 +648,10 @@ 0, 746, 11 + ], + "lengths": [ + 17, + 3503 ] }, { @@ -642,6 +670,10 @@ 0, 1430, 95 + ], + "lengths": [ + 17, + 3503 ] }, { @@ -660,6 +692,10 @@ 0, 2239, 23 + ], + "lengths": [ + 17, + 3503 ] }, { @@ -678,6 +714,10 @@ 0, 2994, 17 + ], + "lengths": [ + 17, + 3503 ] } ], @@ -735,77 +775,77 @@ { "stripeNumber": 3, "stripeInformation": { - "offset": 129645, - "indexLength": 962, + "offset": 129715, + "indexLength": 996, "dataLength": 63770, - "footerLength": 91, + "footerLength": 90, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 129645, + "startOffset": 129715, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 129662, - "length": 164 + "startOffset": 129732, + "length": 176 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 129826, - "length": 167 + "startOffset": 129908, + "length": 182 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 129993, - "length": 102 + "startOffset": 130090, + "length": 109 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 130095, + "startOffset": 130199, "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 130607, + "startOffset": 130711, "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 150642, + "startOffset": 150746, "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 190692, + "startOffset": 190796, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 190709, + "startOffset": 190813, "length": 3510 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 194219, + "startOffset": 194323, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 194244, + "startOffset": 194348, "length": 133 } ], @@ -847,6 +887,10 @@ 0, 0, 0 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -865,6 +909,10 @@ 0, 698, 74 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -883,6 +931,10 @@ 0, 1483, 39 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -901,6 +953,10 @@ 0, 2148, 155 + ], + "lengths": [ + 17, + 3510 ] }, { @@ -919,6 +975,10 @@ 0, 3018, 8 + ], + "lengths": [ + 17, + 3510 ] } ], @@ -976,77 +1036,77 @@ { "stripeNumber": 4, "stripeInformation": { - "offset": 194468, - "indexLength": 973, + "offset": 194571, + "indexLength": 1004, "dataLength": 63756, - "footerLength": 91, + "footerLength": 90, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 194468, + "startOffset": 194571, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 194485, - "length": 166 + "startOffset": 194588, + "length": 179 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 194651, - "length": 171 + "startOffset": 194767, + "length": 184 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 194822, - "length": 107 + "startOffset": 194951, + "length": 112 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 194929, + "startOffset": 195063, "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 195441, + "startOffset": 195575, "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 215476, + "startOffset": 215610, "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 255526, + "startOffset": 255660, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 255543, + "startOffset": 255677, "length": 3496 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 259039, + "startOffset": 259173, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 259064, + "startOffset": 259198, "length": 133 } ], @@ -1088,6 +1148,10 @@ 0, 0, 0 + ], + "lengths": [ + 17, + 3496 ] }, { @@ -1106,6 +1170,10 @@ 0, 495, 338 + ], + "lengths": [ + 17, + 3496 ] }, { @@ -1124,6 +1192,10 @@ 0, 1449, 71 + ], + "lengths": [ + 17, + 3496 ] }, { @@ -1142,6 +1214,10 @@ 0, 2207, 59 + ], + "lengths": [ + 17, + 3496 ] }, { @@ -1160,6 +1236,10 @@ 0, 2838, 223 + ], + "lengths": [ + 17, + 3496 ] } ], @@ -1217,77 +1297,77 @@ { "stripeNumber": 5, "stripeInformation": { - "offset": 259288, - "indexLength": 433, + "offset": 259421, + "indexLength": 447, "dataLength": 12943, - "footerLength": 83, + "footerLength": 84, "rowCount": 1000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 259288, + "startOffset": 259421, "length": 12 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 259300, - "length": 38 + "startOffset": 259433, + "length": 42 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 259338, - "length": 41 + "startOffset": 259475, + "length": 45 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 259379, - "length": 41 + "startOffset": 259520, + "length": 47 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 259420, + "startOffset": 259567, "length": 301 }, { "columnId": 1, "section": "DATA", - "startOffset": 259721, + "startOffset": 259868, "length": 4007 }, { "columnId": 2, "section": "DATA", - "startOffset": 263728, + "startOffset": 263875, "length": 8010 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 271738, + "startOffset": 271885, "length": 16 }, { "columnId": 3, "section": "DATA", - "startOffset": 271754, + "startOffset": 271901, "length": 752 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 272506, + "startOffset": 272653, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 272531, + "startOffset": 272678, "length": 133 } ], @@ -1328,6 +1408,10 @@ 0, 0, 0 + ], + "lengths": [ + 16, + 752 ] }], "bloomFilterIndexes": [{ @@ -1348,7 +1432,7 @@ }] } ], - "fileLength": 273300, + "fileLength": 273440, "paddingLength": 0, "paddingRatio": 0, "status": "OK" diff --git orc/src/test/resources/orc-file-dump.out orc/src/test/resources/orc-file-dump.out index 70f7fbd..bad59ea 100644 --- orc/src/test/resources/orc-file-dump.out +++ orc/src/test/resources/orc-file-dump.out @@ -39,156 +39,156 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63786 rows: 5000 tail: 79 index: 439 + Stripe: offset: 3 data: 63786 rows: 5000 tail: 79 index: 473 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 169 - Stream: column 3 section ROW_INDEX start: 355 length 87 - Stream: column 1 section DATA start: 442 length 20035 - Stream: column 2 section DATA start: 20477 length 40050 - Stream: column 3 section DATA start: 60527 length 3543 - Stream: column 3 section LENGTH start: 64070 length 25 - Stream: column 3 section DICTIONARY_DATA start: 64095 length 133 + Stream: column 1 section ROW_INDEX start: 20 length 178 + Stream: column 2 section ROW_INDEX start: 198 length 184 + Stream: column 3 section ROW_INDEX start: 382 length 94 + Stream: column 1 section DATA start: 476 length 20035 + Stream: column 2 section DATA start: 20511 length 40050 + Stream: column 3 section DATA start: 60561 length 3543 + Stream: column 3 section LENGTH start: 64104 length 25 + Stream: column 3 section DICTIONARY_DATA start: 64129 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 - Stripe: offset: 64307 data: 63775 rows: 5000 tail: 79 index: 432 - Stream: column 0 section ROW_INDEX start: 64307 length 17 - Stream: column 1 section ROW_INDEX start: 64324 length 164 - Stream: column 2 section ROW_INDEX start: 64488 length 168 - Stream: column 3 section ROW_INDEX start: 64656 length 83 - Stream: column 1 section DATA start: 64739 length 20035 - Stream: column 2 section DATA start: 84774 length 40050 - Stream: column 3 section DATA start: 124824 length 3532 - Stream: column 3 section LENGTH start: 128356 length 25 - Stream: column 3 section DICTIONARY_DATA start: 128381 length 133 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 lengths: 3543 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 lengths: 3543 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 lengths: 3543 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 lengths: 3543 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 lengths: 3543 + Stripe: offset: 64341 data: 63775 rows: 5000 tail: 79 index: 470 + Stream: column 0 section ROW_INDEX start: 64341 length 17 + Stream: column 1 section ROW_INDEX start: 64358 length 180 + Stream: column 2 section ROW_INDEX start: 64538 length 185 + Stream: column 3 section ROW_INDEX start: 64723 length 88 + Stream: column 1 section DATA start: 64811 length 20035 + Stream: column 2 section DATA start: 84846 length 40050 + Stream: column 3 section DATA start: 124896 length 3532 + Stream: column 3 section LENGTH start: 128428 length 25 + Stream: column 3 section DICTIONARY_DATA start: 128453 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 - Stripe: offset: 128593 data: 63787 rows: 5000 tail: 79 index: 438 - Stream: column 0 section ROW_INDEX start: 128593 length 17 - Stream: column 1 section ROW_INDEX start: 128610 length 163 - Stream: column 2 section ROW_INDEX start: 128773 length 168 - Stream: column 3 section ROW_INDEX start: 128941 length 90 - Stream: column 1 section DATA start: 129031 length 20035 - Stream: column 2 section DATA start: 149066 length 40050 - Stream: column 3 section DATA start: 189116 length 3544 - Stream: column 3 section LENGTH start: 192660 length 25 - Stream: column 3 section DICTIONARY_DATA start: 192685 length 133 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 lengths: 3532 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 lengths: 3532 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 lengths: 3532 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 lengths: 3532 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 lengths: 3532 + Stripe: offset: 128665 data: 63787 rows: 5000 tail: 79 index: 472 + Stream: column 0 section ROW_INDEX start: 128665 length 17 + Stream: column 1 section ROW_INDEX start: 128682 length 177 + Stream: column 2 section ROW_INDEX start: 128859 length 184 + Stream: column 3 section ROW_INDEX start: 129043 length 94 + Stream: column 1 section DATA start: 129137 length 20035 + Stream: column 2 section DATA start: 149172 length 40050 + Stream: column 3 section DATA start: 189222 length 3544 + Stream: column 3 section LENGTH start: 192766 length 25 + Stream: column 3 section DICTIONARY_DATA start: 192791 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 - Stripe: offset: 192897 data: 63817 rows: 5000 tail: 79 index: 440 - Stream: column 0 section ROW_INDEX start: 192897 length 17 - Stream: column 1 section ROW_INDEX start: 192914 length 165 - Stream: column 2 section ROW_INDEX start: 193079 length 167 - Stream: column 3 section ROW_INDEX start: 193246 length 91 - Stream: column 1 section DATA start: 193337 length 20035 - Stream: column 2 section DATA start: 213372 length 40050 - Stream: column 3 section DATA start: 253422 length 3574 - Stream: column 3 section LENGTH start: 256996 length 25 - Stream: column 3 section DICTIONARY_DATA start: 257021 length 133 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 lengths: 3544 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 lengths: 3544 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 lengths: 3544 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 lengths: 3544 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 lengths: 3544 + Stripe: offset: 193003 data: 63817 rows: 5000 tail: 79 index: 476 + Stream: column 0 section ROW_INDEX start: 193003 length 17 + Stream: column 1 section ROW_INDEX start: 193020 length 180 + Stream: column 2 section ROW_INDEX start: 193200 length 182 + Stream: column 3 section ROW_INDEX start: 193382 length 97 + Stream: column 1 section DATA start: 193479 length 20035 + Stream: column 2 section DATA start: 213514 length 40050 + Stream: column 3 section DATA start: 253564 length 3574 + Stream: column 3 section LENGTH start: 257138 length 25 + Stream: column 3 section DICTIONARY_DATA start: 257163 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 12297,2062,416 + Entry 0: count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0 lengths: 8198 + Entry 1: count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488 lengths: 12297 + Entry 2: count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 4099,2054,464 lengths: 12297 + Entry 3: count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 8198,2058,440 lengths: 11837 + Entry 4: count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 12297,2062,416 lengths: 7738 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 + Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 lengths: 12297 + Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 lengths: 16396 + Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 lengths: 16396 + Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 lengths: 16396 + Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 lengths: 11357 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 - Stripe: offset: 257233 data: 12943 rows: 1000 tail: 71 index: 131 - Stream: column 0 section ROW_INDEX start: 257233 length 12 - Stream: column 1 section ROW_INDEX start: 257245 length 38 - Stream: column 2 section ROW_INDEX start: 257283 length 41 - Stream: column 3 section ROW_INDEX start: 257324 length 40 - Stream: column 1 section DATA start: 257364 length 4007 - Stream: column 2 section DATA start: 261371 length 8010 - Stream: column 3 section DATA start: 269381 length 768 - Stream: column 3 section LENGTH start: 270149 length 25 - Stream: column 3 section DICTIONARY_DATA start: 270174 length 133 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 lengths: 3574 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 lengths: 3574 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 lengths: 3574 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 lengths: 3574 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 lengths: 3574 + Stripe: offset: 257375 data: 12943 rows: 1000 tail: 71 index: 143 + Stream: column 0 section ROW_INDEX start: 257375 length 12 + Stream: column 1 section ROW_INDEX start: 257387 length 42 + Stream: column 2 section ROW_INDEX start: 257429 length 45 + Stream: column 3 section ROW_INDEX start: 257474 length 44 + Stream: column 1 section DATA start: 257518 length 4007 + Stream: column 2 section DATA start: 261525 length 8010 + Stream: column 3 section DATA start: 269535 length 768 + Stream: column 3 section LENGTH start: 270303 length 25 + Stream: column 3 section DICTIONARY_DATA start: 270328 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0 lengths: 4007 Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 lengths: 8010 Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 lengths: 768 -File length: 270923 bytes +File length: 271077 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git orc/src/test/resources/orc-file-entry-lengths.json orc/src/test/resources/orc-file-entry-lengths.json new file mode 100644 index 0000000..65b72e6 --- /dev/null +++ orc/src/test/resources/orc-file-entry-lengths.json @@ -0,0 +1,4555 @@ +{ + "fileName": "TestFileDump.testDump.orc", + "fileVersion": "0.12", + "writerVersion": "HIVE_13083", + "numberOfRows": 103424, + "compression": "NONE", + "schemaString": "struct", + "schema": [ + { + "columnId": 0, + "columnType": "STRUCT", + "childColumnNames": [ + "s1", + "s2" + ], + "childColumnIds": [ + 1, + 2 + ] + }, + { + "columnId": 1, + "columnType": "STRING" + }, + { + "columnId": 2, + "columnType": "VARCHAR", + "maxLength": 300 + } + ], + "stripeStatistics": [{ + "stripeNumber": 1, + "columnStatistics": [ + { + "columnId": 0, + "count": 103424, + "hasNull": false + }, + { + "columnId": 1, + "count": 102400, + "hasNull": true, + "min": "00", + "max": "ff", + "totalLength": 204800, + "type": "STRING" + }, + { + "columnId": 2, + "count": 102400, + "hasNull": true, + "min": "0000", + "max": "1023", + "totalLength": 409600, + "type": "STRING" + } + ] + }], + "fileStatistics": [ + { + "columnId": 0, + "count": 103424, + "hasNull": false + }, + { + "columnId": 1, + "count": 102400, + "hasNull": true, + "min": "00", + "max": "ff", + "totalLength": 204800, + "type": "STRING" + }, + { + "columnId": 2, + "count": 102400, + "hasNull": true, + "min": "0000", + "max": "1023", + "totalLength": 409600, + "type": "STRING" + } + ], + "stripes": [{ + "stripeNumber": 1, + "stripeInformation": { + "offset": 3, + "indexLength": 9308, + "dataLength": 514116, + "footerLength": 106, + "rowCount": 103424 + }, + "streams": [ + { + "columnId": 0, + "section": "ROW_INDEX", + "startOffset": 3, + "length": 936 + }, + { + "columnId": 1, + "section": "ROW_INDEX", + "startOffset": 939, + "length": 3827 + }, + { + "columnId": 2, + "section": "ROW_INDEX", + "startOffset": 4766, + "length": 4545 + }, + { + "columnId": 1, + "section": "PRESENT", + "startOffset": 9311, + "length": 200 + }, + { + "columnId": 1, + "section": "DATA", + "startOffset": 9511, + "length": 102800 + }, + { + "columnId": 1, + "section": "LENGTH", + "startOffset": 112311, + "length": 4 + }, + { + "columnId": 1, + "section": "DICTIONARY_DATA", + "startOffset": 112315, + "length": 512 + }, + { + "columnId": 2, + "section": "PRESENT", + "startOffset": 112827, + "length": 200 + }, + { + "columnId": 2, + "section": "DATA", + "startOffset": 113027, + "length": 409600 + }, + { + "columnId": 2, + "section": "LENGTH", + "startOffset": 522627, + "length": 800 + } + ], + "encodings": [ + { + "columnId": 0, + "kind": "DIRECT" + }, + { + "columnId": 1, + "kind": "DICTIONARY_V2", + "dictionarySize": 256 + }, + { + "columnId": 2, + "kind": "DIRECT_V2" + } + ], + "indexes": [ + { + "columnId": 1, + "rowGroupIndexes": [ + { + "entryId": 0, + "count": 0, + "hasNull": true, + "min": null, + "max": null, + "totalLength": 0, + "type": "STRING", + "positions": [ + 0, + 0, + 0, + 0, + 0 + ], + "lengths": [ + 2, + 0 + ] + }, + { + "entryId": 1, + "count": 976, + "hasNull": true, + "min": "00", + "max": "ff", + "totalLength": 1952, + "type": "STRING", + "positions": [ + 0, + 125, + 0, + 0, + 0 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 2, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 2, + 122, + 0, + 514, + 464 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 3, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 4, + 117, + 0, + 1542, + 440 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 4, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 6, + 112, + 0, + 2570, + 416 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 5, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 8, + 107, + 0, + 3598, + 392 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 6, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 10, + 102, + 0, + 4626, + 368 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 7, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 12, + 97, + 0, + 5654, + 344 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 8, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 14, + 92, + 0, + 6682, + 320 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 9, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 16, + 87, + 0, + 7710, + 296 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 10, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 18, + 82, + 0, + 8738, + 272 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 11, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 20, + 77, + 0, + 9766, + 248 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 12, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 22, + 72, + 0, + 10794, + 224 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 13, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 24, + 67, + 0, + 11822, + 200 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 14, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 26, + 62, + 0, + 12850, + 176 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 15, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 28, + 57, + 0, + 13878, + 152 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 16, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 30, + 52, + 0, + 14906, + 128 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 17, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 32, + 47, + 0, + 15934, + 104 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 18, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 34, + 42, + 0, + 16962, + 80 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 19, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 36, + 37, + 0, + 17990, + 56 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 20, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 38, + 32, + 0, + 19018, + 32 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 21, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 40, + 27, + 0, + 20046, + 8 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 22, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 42, + 22, + 0, + 20560, + 496 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 23, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 44, + 17, + 0, + 21588, + 472 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 24, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 46, + 12, + 0, + 22616, + 448 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 25, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 48, + 7, + 0, + 23644, + 424 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 26, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 50, + 2, + 0, + 24672, + 400 + ], + "lengths": [ + 2, + 1542 + ] + }, + { + "entryId": 27, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 50, + 127, + 0, + 25700, + 376 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 28, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 52, + 122, + 0, + 26728, + 352 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 29, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 54, + 117, + 0, + 27756, + 328 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 30, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 56, + 112, + 0, + 28784, + 304 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 31, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 58, + 107, + 0, + 29812, + 280 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 32, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 60, + 102, + 0, + 30840, + 256 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 33, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 62, + 97, + 0, + 31868, + 232 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 34, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 64, + 92, + 0, + 32896, + 208 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 35, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 66, + 87, + 0, + 33924, + 184 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 36, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 68, + 82, + 0, + 34952, + 160 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 37, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 70, + 77, + 0, + 35980, + 136 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 38, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 72, + 72, + 0, + 37008, + 112 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 39, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 74, + 67, + 0, + 38036, + 88 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 40, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 76, + 62, + 0, + 39064, + 64 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 41, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 78, + 57, + 0, + 40092, + 40 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 42, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 80, + 52, + 0, + 41120, + 16 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 43, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 82, + 47, + 0, + 41634, + 504 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 44, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 84, + 42, + 0, + 42662, + 480 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 45, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 86, + 37, + 0, + 43690, + 456 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 46, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 88, + 32, + 0, + 44718, + 432 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 47, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 90, + 27, + 0, + 45746, + 408 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 48, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 92, + 22, + 0, + 46774, + 384 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 49, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 94, + 17, + 0, + 47802, + 360 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 50, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 96, + 12, + 0, + 48830, + 336 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 51, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 98, + 7, + 0, + 49858, + 312 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 52, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 100, + 2, + 0, + 50886, + 288 + ], + "lengths": [ + 2, + 1542 + ] + }, + { + "entryId": 53, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 100, + 127, + 0, + 51914, + 264 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 54, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 102, + 122, + 0, + 52942, + 240 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 55, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 104, + 117, + 0, + 53970, + 216 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 56, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 106, + 112, + 0, + 54998, + 192 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 57, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 108, + 107, + 0, + 56026, + 168 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 58, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 110, + 102, + 0, + 57054, + 144 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 59, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 112, + 97, + 0, + 58082, + 120 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 60, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 114, + 92, + 0, + 59110, + 96 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 61, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 116, + 87, + 0, + 60138, + 72 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 62, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 118, + 82, + 0, + 61166, + 48 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 63, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 120, + 77, + 0, + 62194, + 24 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 64, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 122, + 72, + 0, + 63222, + 0 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 65, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 124, + 67, + 0, + 63736, + 488 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 66, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 126, + 62, + 0, + 64764, + 464 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 67, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 128, + 57, + 0, + 65792, + 440 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 68, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 130, + 52, + 0, + 66820, + 416 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 69, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 132, + 47, + 0, + 67848, + 392 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 70, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 134, + 42, + 0, + 68876, + 368 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 71, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 136, + 37, + 0, + 69904, + 344 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 72, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 138, + 32, + 0, + 70932, + 320 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 73, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 140, + 27, + 0, + 71960, + 296 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 74, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 142, + 22, + 0, + 72988, + 272 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 75, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 144, + 17, + 0, + 74016, + 248 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 76, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 146, + 12, + 0, + 75044, + 224 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 77, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 148, + 7, + 0, + 76072, + 200 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 78, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 150, + 2, + 0, + 77100, + 176 + ], + "lengths": [ + 2, + 1542 + ] + }, + { + "entryId": 79, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 150, + 127, + 0, + 78128, + 152 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 80, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 152, + 122, + 0, + 79156, + 128 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 81, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 154, + 117, + 0, + 80184, + 104 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 82, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 156, + 112, + 0, + 81212, + 80 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 83, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 158, + 107, + 0, + 82240, + 56 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 84, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 160, + 102, + 0, + 83268, + 32 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 85, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 162, + 97, + 0, + 84296, + 8 + ], + "lengths": [ + 4, + 1028 + ] + }, + { + "entryId": 86, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 164, + 92, + 0, + 84810, + 496 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 87, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 166, + 87, + 0, + 85838, + 472 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 88, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 168, + 82, + 0, + 86866, + 448 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 89, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 170, + 77, + 0, + 87894, + 424 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 90, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 172, + 72, + 0, + 88922, + 400 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 91, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 174, + 67, + 0, + 89950, + 376 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 92, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 176, + 62, + 0, + 90978, + 352 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 93, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 178, + 57, + 0, + 92006, + 328 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 94, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 180, + 52, + 0, + 93034, + 304 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 95, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 182, + 47, + 0, + 94062, + 280 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 96, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 184, + 42, + 0, + 95090, + 256 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 97, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 186, + 37, + 0, + 96118, + 232 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 98, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 188, + 32, + 0, + 97146, + 208 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 99, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 190, + 27, + 0, + 98174, + 184 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 100, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 192, + 22, + 0, + 99202, + 160 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 101, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 194, + 17, + 0, + 100230, + 136 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 102, + "count": 1000, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 2000, + "type": "STRING", + "positions": [ + 196, + 12, + 0, + 101258, + 112 + ], + "lengths": [ + 4, + 1542 + ] + }, + { + "entryId": 103, + "count": 424, + "hasNull": false, + "min": "00", + "max": "ff", + "totalLength": 848, + "type": "STRING", + "positions": [ + 198, + 7, + 0, + 102286, + 88 + ], + "lengths": [ + 2, + 514 + ] + } + ] + }, + { + "columnId": 2, + "rowGroupIndexes": [ + { + "entryId": 0, + "count": 0, + "hasNull": true, + "min": null, + "max": null, + "totalLength": 0, + "type": "STRING", + "positions": [ + 0, + 0, + 0, + 0, + 0, + 0 + ], + "lengths": [ + 2, + 0, + 0 + ] + }, + { + "entryId": 1, + "count": 976, + "hasNull": true, + "min": "0000", + "max": "0975", + "totalLength": 3904, + "type": "STRING", + "positions": [ + 0, + 125, + 0, + 0, + 0, + 0 + ], + "lengths": [ + 4, + 3904, + 8 + ] + }, + { + "entryId": 2, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 2, + 122, + 0, + 3904, + 4, + 464 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 3, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 4, + 117, + 0, + 7904, + 12, + 440 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 4, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 6, + 112, + 0, + 11904, + 20, + 416 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 5, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 8, + 107, + 0, + 15904, + 28, + 392 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 6, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 10, + 102, + 0, + 19904, + 36, + 368 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 7, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 12, + 97, + 0, + 23904, + 44, + 344 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 8, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 14, + 92, + 0, + 27904, + 52, + 320 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 9, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 16, + 87, + 0, + 31904, + 60, + 296 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 10, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 18, + 82, + 0, + 35904, + 68, + 272 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 11, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 20, + 77, + 0, + 39904, + 76, + 248 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 12, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 22, + 72, + 0, + 43904, + 84, + 224 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 13, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 24, + 67, + 0, + 47904, + 92, + 200 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 14, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 26, + 62, + 0, + 51904, + 100, + 176 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 15, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 28, + 57, + 0, + 55904, + 108, + 152 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 16, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 30, + 52, + 0, + 59904, + 116, + 128 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 17, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 32, + 47, + 0, + 63904, + 124, + 104 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 18, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 34, + 42, + 0, + 67904, + 132, + 80 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 19, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 36, + 37, + 0, + 71904, + 140, + 56 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 20, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 38, + 32, + 0, + 75904, + 148, + 32 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 21, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 40, + 27, + 0, + 79904, + 156, + 8 + ], + "lengths": [ + 4, + 4000, + 8 + ] + }, + { + "entryId": 22, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 42, + 22, + 0, + 83904, + 160, + 496 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 23, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 44, + 17, + 0, + 87904, + 168, + 472 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 24, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 46, + 12, + 0, + 91904, + 176, + 448 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 25, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 48, + 7, + 0, + 95904, + 184, + 424 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 26, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 50, + 2, + 0, + 99904, + 192, + 400 + ], + "lengths": [ + 2, + 4000, + 12 + ] + }, + { + "entryId": 27, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 50, + 127, + 0, + 103904, + 200, + 376 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 28, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 52, + 122, + 0, + 107904, + 208, + 352 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 29, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 54, + 117, + 0, + 111904, + 216, + 328 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 30, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 56, + 112, + 0, + 115904, + 224, + 304 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 31, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 58, + 107, + 0, + 119904, + 232, + 280 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 32, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 60, + 102, + 0, + 123904, + 240, + 256 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 33, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 62, + 97, + 0, + 127904, + 248, + 232 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 34, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 64, + 92, + 0, + 131904, + 256, + 208 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 35, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 66, + 87, + 0, + 135904, + 264, + 184 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 36, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 68, + 82, + 0, + 139904, + 272, + 160 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 37, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 70, + 77, + 0, + 143904, + 280, + 136 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 38, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 72, + 72, + 0, + 147904, + 288, + 112 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 39, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 74, + 67, + 0, + 151904, + 296, + 88 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 40, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 76, + 62, + 0, + 155904, + 304, + 64 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 41, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 78, + 57, + 0, + 159904, + 312, + 40 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 42, + "count": 1000, + "hasNull": false, + "min": "0016", + "max": "1015", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 80, + 52, + 0, + 163904, + 320, + 16 + ], + "lengths": [ + 4, + 4000, + 8 + ] + }, + { + "entryId": 43, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 82, + 47, + 0, + 167904, + 324, + 504 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 44, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 84, + 42, + 0, + 171904, + 332, + 480 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 45, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 86, + 37, + 0, + 175904, + 340, + 456 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 46, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 88, + 32, + 0, + 179904, + 348, + 432 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 47, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 90, + 27, + 0, + 183904, + 356, + 408 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 48, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 92, + 22, + 0, + 187904, + 364, + 384 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 49, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 94, + 17, + 0, + 191904, + 372, + 360 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 50, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 96, + 12, + 0, + 195904, + 380, + 336 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 51, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 98, + 7, + 0, + 199904, + 388, + 312 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 52, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 100, + 2, + 0, + 203904, + 396, + 288 + ], + "lengths": [ + 2, + 4000, + 12 + ] + }, + { + "entryId": 53, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 100, + 127, + 0, + 207904, + 404, + 264 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 54, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 102, + 122, + 0, + 211904, + 412, + 240 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 55, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 104, + 117, + 0, + 215904, + 420, + 216 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 56, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 106, + 112, + 0, + 219904, + 428, + 192 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 57, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 108, + 107, + 0, + 223904, + 436, + 168 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 58, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 110, + 102, + 0, + 227904, + 444, + 144 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 59, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 112, + 97, + 0, + 231904, + 452, + 120 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 60, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 114, + 92, + 0, + 235904, + 460, + 96 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 61, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 116, + 87, + 0, + 239904, + 468, + 72 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 62, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 118, + 82, + 0, + 243904, + 476, + 48 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 63, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 120, + 77, + 0, + 247904, + 484, + 24 + ], + "lengths": [ + 4, + 4000, + 8 + ] + }, + { + "entryId": 64, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 122, + 72, + 0, + 251904, + 492, + 0 + ], + "lengths": [ + 4, + 4000, + 8 + ] + }, + { + "entryId": 65, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 124, + 67, + 0, + 255904, + 496, + 488 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 66, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 126, + 62, + 0, + 259904, + 504, + 464 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 67, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 128, + 57, + 0, + 263904, + 512, + 440 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 68, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 130, + 52, + 0, + 267904, + 520, + 416 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 69, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 132, + 47, + 0, + 271904, + 528, + 392 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 70, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 134, + 42, + 0, + 275904, + 536, + 368 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 71, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 136, + 37, + 0, + 279904, + 544, + 344 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 72, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 138, + 32, + 0, + 283904, + 552, + 320 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 73, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 140, + 27, + 0, + 287904, + 560, + 296 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 74, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 142, + 22, + 0, + 291904, + 568, + 272 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 75, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 144, + 17, + 0, + 295904, + 576, + 248 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 76, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 146, + 12, + 0, + 299904, + 584, + 224 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 77, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 148, + 7, + 0, + 303904, + 592, + 200 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 78, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 150, + 2, + 0, + 307904, + 600, + 176 + ], + "lengths": [ + 2, + 4000, + 12 + ] + }, + { + "entryId": 79, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 150, + 127, + 0, + 311904, + 608, + 152 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 80, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 152, + 122, + 0, + 315904, + 616, + 128 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 81, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 154, + 117, + 0, + 319904, + 624, + 104 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 82, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 156, + 112, + 0, + 323904, + 632, + 80 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 83, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 158, + 107, + 0, + 327904, + 640, + 56 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 84, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 160, + 102, + 0, + 331904, + 648, + 32 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 85, + "count": 1000, + "hasNull": false, + "min": "0008", + "max": "1007", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 162, + 97, + 0, + 335904, + 656, + 8 + ], + "lengths": [ + 4, + 4000, + 8 + ] + }, + { + "entryId": 86, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 164, + 92, + 0, + 339904, + 660, + 496 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 87, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 166, + 87, + 0, + 343904, + 668, + 472 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 88, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 168, + 82, + 0, + 347904, + 676, + 448 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 89, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 170, + 77, + 0, + 351904, + 684, + 424 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 90, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 172, + 72, + 0, + 355904, + 692, + 400 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 91, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 174, + 67, + 0, + 359904, + 700, + 376 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 92, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 176, + 62, + 0, + 363904, + 708, + 352 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 93, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 178, + 57, + 0, + 367904, + 716, + 328 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 94, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 180, + 52, + 0, + 371904, + 724, + 304 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 95, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 182, + 47, + 0, + 375904, + 732, + 280 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 96, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 184, + 42, + 0, + 379904, + 740, + 256 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 97, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 186, + 37, + 0, + 383904, + 748, + 232 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 98, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 188, + 32, + 0, + 387904, + 756, + 208 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 99, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 190, + 27, + 0, + 391904, + 764, + 184 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 100, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 192, + 22, + 0, + 395904, + 772, + 160 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 101, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 194, + 17, + 0, + 399904, + 780, + 136 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 102, + "count": 1000, + "hasNull": false, + "min": "0000", + "max": "1023", + "totalLength": 4000, + "type": "STRING", + "positions": [ + 196, + 12, + 0, + 403904, + 788, + 112 + ], + "lengths": [ + 4, + 4000, + 12 + ] + }, + { + "entryId": 103, + "count": 424, + "hasNull": false, + "min": "0600", + "max": "1023", + "totalLength": 1696, + "type": "STRING", + "positions": [ + 198, + 7, + 0, + 407904, + 796, + 88 + ], + "lengths": [ + 2, + 1696, + 4 + ] + } + ] + } + ] + }], + "fileLength": 523724, + "paddingLength": 0, + "paddingRatio": 0, + "status": "OK" +} diff --git orc/src/test/resources/orc-file-has-null.out orc/src/test/resources/orc-file-has-null.out index e98a73f..c47651e 100644 --- orc/src/test/resources/orc-file-has-null.out +++ orc/src/test/resources/orc-file-has-null.out @@ -29,83 +29,83 @@ File Statistics: Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 Stripes: - Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154 + Stripe: offset: 3 data: 220 rows: 5000 tail: 64 index: 172 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 60 - Stream: column 2 section ROW_INDEX start: 80 length 77 - Stream: column 1 section DATA start: 157 length 159 - Stream: column 1 section LENGTH start: 316 length 15 - Stream: column 2 section PRESENT start: 331 length 13 - Stream: column 2 section DATA start: 344 length 18 - Stream: column 2 section LENGTH start: 362 length 6 - Stream: column 2 section DICTIONARY_DATA start: 368 length 9 + Stream: column 1 section ROW_INDEX start: 20 length 73 + Stream: column 2 section ROW_INDEX start: 93 length 82 + Stream: column 1 section DATA start: 175 length 159 + Stream: column 1 section LENGTH start: 334 length 15 + Stream: column 2 section PRESENT start: 349 length 13 + Stream: column 2 section DATA start: 362 length 18 + Stream: column 2 section LENGTH start: 380 length 6 + Stream: column 2 section DICTIONARY_DATA start: 386 length 9 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[2] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488 - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488 - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488 - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488 - Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116 - Stream: column 0 section ROW_INDEX start: 442 length 17 - Stream: column 1 section ROW_INDEX start: 459 length 60 - Stream: column 2 section ROW_INDEX start: 519 length 39 - Stream: column 1 section DATA start: 558 length 159 - Stream: column 1 section LENGTH start: 717 length 15 - Stream: column 2 section PRESENT start: 732 length 11 - Stream: column 2 section DATA start: 743 length 0 - Stream: column 2 section LENGTH start: 743 length 0 - Stream: column 2 section DICTIONARY_DATA start: 743 length 0 + Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 lengths: 13,18 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488 lengths: 13,18 + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488 lengths: 13,18 + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488 lengths: 13,18 + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488 lengths: 13,18 + Stripe: offset: 459 data: 185 rows: 5000 tail: 64 index: 133 + Stream: column 0 section ROW_INDEX start: 459 length 17 + Stream: column 1 section ROW_INDEX start: 476 length 73 + Stream: column 2 section ROW_INDEX start: 549 length 43 + Stream: column 1 section DATA start: 592 length 159 + Stream: column 1 section LENGTH start: 751 length 15 + Stream: column 2 section PRESENT start: 766 length 11 + Stream: column 2 section DATA start: 777 length 0 + Stream: column 2 section LENGTH start: 777 length 0 + Stream: column 2 section DICTIONARY_DATA start: 777 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] Row group indices for column 2: - Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137 - Stream: column 0 section ROW_INDEX start: 807 length 17 - Stream: column 1 section ROW_INDEX start: 824 length 60 - Stream: column 2 section ROW_INDEX start: 884 length 60 - Stream: column 1 section DATA start: 944 length 159 - Stream: column 1 section LENGTH start: 1103 length 15 - Stream: column 2 section DATA start: 1118 length 15 - Stream: column 2 section LENGTH start: 1133 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1139 length 11 + Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 lengths: 11,0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 lengths: 11,0 + Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 lengths: 11,0 + Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 lengths: 11,0 + Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 lengths: 11,0 + Stripe: offset: 841 data: 206 rows: 5000 tail: 62 index: 154 + Stream: column 0 section ROW_INDEX start: 841 length 17 + Stream: column 1 section ROW_INDEX start: 858 length 73 + Stream: column 2 section ROW_INDEX start: 931 length 64 + Stream: column 1 section DATA start: 995 length 159 + Stream: column 1 section LENGTH start: 1154 length 15 + Stream: column 2 section DATA start: 1169 length 15 + Stream: column 2 section LENGTH start: 1184 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1190 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488 - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464 - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440 - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416 - Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116 - Stream: column 0 section ROW_INDEX start: 1210 length 17 - Stream: column 1 section ROW_INDEX start: 1227 length 60 - Stream: column 2 section ROW_INDEX start: 1287 length 39 - Stream: column 1 section DATA start: 1326 length 159 - Stream: column 1 section LENGTH start: 1485 length 15 - Stream: column 2 section PRESENT start: 1500 length 11 - Stream: column 2 section DATA start: 1511 length 0 - Stream: column 2 section LENGTH start: 1511 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1511 length 0 + Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 lengths: 15 + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488 lengths: 15 + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464 lengths: 15 + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440 lengths: 15 + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416 lengths: 15 + Stripe: offset: 1263 data: 185 rows: 5000 tail: 64 index: 133 + Stream: column 0 section ROW_INDEX start: 1263 length 17 + Stream: column 1 section ROW_INDEX start: 1280 length 73 + Stream: column 2 section ROW_INDEX start: 1353 length 43 + Stream: column 1 section DATA start: 1396 length 159 + Stream: column 1 section LENGTH start: 1555 length 15 + Stream: column 2 section PRESENT start: 1570 length 11 + Stream: column 2 section DATA start: 1581 length 0 + Stream: column 2 section LENGTH start: 1581 length 0 + Stream: column 2 section DICTIONARY_DATA start: 1581 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] Row group indices for column 2: - Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 + Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 lengths: 11,0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 lengths: 11,0 + Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 lengths: 11,0 + Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 lengths: 11,0 + Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 lengths: 11,0 -File length: 1823 bytes +File length: 1892 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java index dad35e3..0a53a00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java @@ -146,8 +146,8 @@ public ColumnReadContext(int colIx, OrcProto.ColumnEncoding encoding, /** Column index in the file. */ int colIx; - public void addStream(long offset, OrcProto.Stream stream, int indexIx) { - streams[streamCount++] = new StreamContext(stream, offset, indexIx); + public void addStream(long offset, OrcProto.Stream stream, int indexIx, int streamPosition) { + streams[streamCount++] = new StreamContext(stream, offset, indexIx, streamPosition); } @Override @@ -168,16 +168,19 @@ public String toString() { } private static final class StreamContext { - public StreamContext(OrcProto.Stream stream, long streamOffset, int streamIndexOffset) { + public StreamContext(OrcProto.Stream stream, long streamOffset, int streamIndexOffset, + int streamPosition) { this.kind = stream.getKind(); this.length = stream.getLength(); this.offset = streamOffset; this.streamIndexOffset = streamIndexOffset; + this.streamPosition = streamPosition; } /** Offsets of each stream in the column. */ public long offset, length; public int streamIndexOffset; + public final int streamPosition; public OrcProto.Stream.Kind kind; /** Iterators for the buffers; used to maintain position in per-rg reading. */ DiskRangeList bufferIter; @@ -191,6 +194,7 @@ public String toString() { sb.append(" offset: ").append(offset); sb.append(" length: ").append(length); sb.append(" index_offset: ").append(streamIndexOffset); + sb.append(" index_stream: ").append(streamPosition); return sb.toString(); } } @@ -251,7 +255,9 @@ public void readEncodedColumns(int stripeIx, StripeInformation stripe, } int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]); - ctx.addStream(offset, stream, indexIx); + int streamPosition = RecordReaderUtils.getLengthPosition(ctx.encoding.getKind(), + types.get(colIx).getKind(), streamKind, hasNull[colIx]); + ctx.addStream(offset, stream, indexIx, streamPosition); if (isTracingEnabled) { LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx); @@ -384,10 +390,15 @@ public void readEncodedColumns(int stripeIx, StripeInformation stripe, // Offset relative to the beginning of the stream of where this RG ends. long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset); - // Offset before which this RG is guaranteed to end. Can only be estimated. - // We estimate the same way for compressed and uncompressed for now. - long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset( - isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize); + // Offset before which this RG is guaranteed to end. + long endCOffset; + if (index.getLengthsCount() != 0) { + endCOffset = cOffset + index.getLengths(sctx.streamPosition); + } else { + endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset( + isCompressed, isLastRg, nextCOffsetRel, sctx.length, + bufferSize); + } // As we read, we can unlock initial refcounts for the buffers that end before // the data that we need for this RG. long unlockUntilCOffset = sctx.offset + nextCOffsetRel; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 4eb0249..5ed36b0 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -2102,14 +2102,14 @@ public void testCombinationInputFormatWithAcid() throws Exception { assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString()); assertEquals(0, split.getStart()); - assertEquals(607, split.getLength()); + assertEquals(630, split.getLength()); split = (HiveInputFormat.HiveInputSplit) splits[1]; assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName()); assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString()); assertEquals(0, split.getStart()); - assertEquals(629, split.getLength()); + assertEquals(652, split.getLength()); CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2]; assertEquals(BUCKETS, combineSplit.getNumPaths()); @@ -2117,7 +2117,7 @@ public void testCombinationInputFormatWithAcid() throws Exception { assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString()); assertEquals(0, combineSplit.getOffset(bucket)); - assertEquals(241, combineSplit.getLength(bucket)); + assertEquals(247, combineSplit.getLength(bucket)); } String[] hosts = combineSplit.getLocations(); assertEquals(2, hosts.length);