diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index 5523e97..5faba77 100644 --- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -8033,6 +8033,1230 @@ public Builder removeEntry(int index) { // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.RowIndex) } + public interface BloomFilterOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional uint32 numHashFunctions = 1; + /** + * optional uint32 numHashFunctions = 1; + */ + boolean hasNumHashFunctions(); + /** + * optional uint32 numHashFunctions = 1; + */ + int getNumHashFunctions(); + + // repeated fixed64 bitset = 2; + /** + * repeated fixed64 bitset = 2; + */ + java.util.List getBitsetList(); + /** + * repeated fixed64 bitset = 2; + */ + int getBitsetCount(); + /** + * repeated fixed64 bitset = 2; + */ + long getBitset(int index); + } + /** + * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BloomFilter} + */ + public static final class BloomFilter extends + com.google.protobuf.GeneratedMessage + implements BloomFilterOrBuilder { + // Use BloomFilter.newBuilder() to construct. + private BloomFilter(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private BloomFilter(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final BloomFilter defaultInstance; + public static BloomFilter getDefaultInstance() { + return defaultInstance; + } + + public BloomFilter getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private BloomFilter( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + numHashFunctions_ = input.readUInt32(); + break; + } + case 17: { + if (!((mutable_bitField0_ & 0x00000002) == 0x00000002)) { + bitset_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000002; + } + bitset_.add(input.readFixed64()); + break; + } + case 18: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000002) == 0x00000002) && input.getBytesUntilLimit() > 0) { + bitset_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000002; + } + while (input.getBytesUntilLimit() > 0) { + bitset_.add(input.readFixed64()); + } + input.popLimit(limit); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000002) == 0x00000002)) { + bitset_ = java.util.Collections.unmodifiableList(bitset_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public BloomFilter parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new BloomFilter(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional uint32 numHashFunctions = 1; + public static final int NUMHASHFUNCTIONS_FIELD_NUMBER = 1; + private int numHashFunctions_; + /** + * optional uint32 numHashFunctions = 1; + */ + public boolean hasNumHashFunctions() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional uint32 numHashFunctions = 1; + */ + public int getNumHashFunctions() { + return numHashFunctions_; + } + + // repeated fixed64 bitset = 2; + public static final int BITSET_FIELD_NUMBER = 2; + private java.util.List bitset_; + /** + * repeated fixed64 bitset = 2; + */ + public java.util.List + getBitsetList() { + return bitset_; + } + /** + * repeated fixed64 bitset = 2; + */ + public int getBitsetCount() { + return bitset_.size(); + } + /** + * repeated fixed64 bitset = 2; + */ + public long getBitset(int index) { + return bitset_.get(index); + } + + private void initFields() { + numHashFunctions_ = 0; + bitset_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeUInt32(1, numHashFunctions_); + } + for (int i = 0; i < bitset_.size(); i++) { + output.writeFixed64(2, bitset_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt32Size(1, numHashFunctions_); + } + { + int dataSize = 0; + dataSize = 8 * getBitsetList().size(); + size += dataSize; + size += 1 * getBitsetList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BloomFilter} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder.class); + } + + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + numHashFunctions_ = 0; + bitField0_ = (bitField0_ & ~0x00000001); + bitset_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter getDefaultInstanceForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.getDefaultInstance(); + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter build() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter buildPartial() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.numHashFunctions_ = numHashFunctions_; + if (((bitField0_ & 0x00000002) == 0x00000002)) { + bitset_ = java.util.Collections.unmodifiableList(bitset_); + bitField0_ = (bitField0_ & ~0x00000002); + } + result.bitset_ = bitset_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter) { + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter other) { + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.getDefaultInstance()) return this; + if (other.hasNumHashFunctions()) { + setNumHashFunctions(other.getNumHashFunctions()); + } + if (!other.bitset_.isEmpty()) { + if (bitset_.isEmpty()) { + bitset_ = other.bitset_; + bitField0_ = (bitField0_ & ~0x00000002); + } else { + ensureBitsetIsMutable(); + bitset_.addAll(other.bitset_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional uint32 numHashFunctions = 1; + private int numHashFunctions_ ; + /** + * optional uint32 numHashFunctions = 1; + */ + public boolean hasNumHashFunctions() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional uint32 numHashFunctions = 1; + */ + public int getNumHashFunctions() { + return numHashFunctions_; + } + /** + * optional uint32 numHashFunctions = 1; + */ + public Builder setNumHashFunctions(int value) { + bitField0_ |= 0x00000001; + numHashFunctions_ = value; + onChanged(); + return this; + } + /** + * optional uint32 numHashFunctions = 1; + */ + public Builder clearNumHashFunctions() { + bitField0_ = (bitField0_ & ~0x00000001); + numHashFunctions_ = 0; + onChanged(); + return this; + } + + // repeated fixed64 bitset = 2; + private java.util.List bitset_ = java.util.Collections.emptyList(); + private void ensureBitsetIsMutable() { + if (!((bitField0_ & 0x00000002) == 0x00000002)) { + bitset_ = new java.util.ArrayList(bitset_); + bitField0_ |= 0x00000002; + } + } + /** + * repeated fixed64 bitset = 2; + */ + public java.util.List + getBitsetList() { + return java.util.Collections.unmodifiableList(bitset_); + } + /** + * repeated fixed64 bitset = 2; + */ + public int getBitsetCount() { + return bitset_.size(); + } + /** + * repeated fixed64 bitset = 2; + */ + public long getBitset(int index) { + return bitset_.get(index); + } + /** + * repeated fixed64 bitset = 2; + */ + public Builder setBitset( + int index, long value) { + ensureBitsetIsMutable(); + bitset_.set(index, value); + onChanged(); + return this; + } + /** + * repeated fixed64 bitset = 2; + */ + public Builder addBitset(long value) { + ensureBitsetIsMutable(); + bitset_.add(value); + onChanged(); + return this; + } + /** + * repeated fixed64 bitset = 2; + */ + public Builder addAllBitset( + java.lang.Iterable values) { + ensureBitsetIsMutable(); + super.addAll(values, bitset_); + onChanged(); + return this; + } + /** + * repeated fixed64 bitset = 2; + */ + public Builder clearBitset() { + bitset_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.BloomFilter) + } + + static { + defaultInstance = new BloomFilter(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.BloomFilter) + } + + public interface BloomFilterIndexOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + java.util.List + getBloomFilterList(); + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter getBloomFilter(int index); + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + int getBloomFilterCount(); + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + java.util.List + getBloomFilterOrBuilderList(); + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder getBloomFilterOrBuilder( + int index); + } + /** + * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BloomFilterIndex} + */ + public static final class BloomFilterIndex extends + com.google.protobuf.GeneratedMessage + implements BloomFilterIndexOrBuilder { + // Use BloomFilterIndex.newBuilder() to construct. + private BloomFilterIndex(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private BloomFilterIndex(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final BloomFilterIndex defaultInstance; + public static BloomFilterIndex getDefaultInstance() { + return defaultInstance; + } + + public BloomFilterIndex getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private BloomFilterIndex( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + bloomFilter_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + bloomFilter_.add(input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.PARSER, extensionRegistry)); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + bloomFilter_ = java.util.Collections.unmodifiableList(bloomFilter_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public BloomFilterIndex parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new BloomFilterIndex(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + public static final int BLOOMFILTER_FIELD_NUMBER = 1; + private java.util.List bloomFilter_; + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public java.util.List getBloomFilterList() { + return bloomFilter_; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public java.util.List + getBloomFilterOrBuilderList() { + return bloomFilter_; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public int getBloomFilterCount() { + return bloomFilter_.size(); + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter getBloomFilter(int index) { + return bloomFilter_.get(index); + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder getBloomFilterOrBuilder( + int index) { + return bloomFilter_.get(index); + } + + private void initFields() { + bloomFilter_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < bloomFilter_.size(); i++) { + output.writeMessage(1, bloomFilter_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + for (int i = 0; i < bloomFilter_.size(); i++) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(1, bloomFilter_.get(i)); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BloomFilterIndex} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndexOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.Builder.class); + } + + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + getBloomFilterFieldBuilder(); + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + if (bloomFilterBuilder_ == null) { + bloomFilter_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + } else { + bloomFilterBuilder_.clear(); + } + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex getDefaultInstanceForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.getDefaultInstance(); + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex build() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex buildPartial() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex(this); + int from_bitField0_ = bitField0_; + if (bloomFilterBuilder_ == null) { + if (((bitField0_ & 0x00000001) == 0x00000001)) { + bloomFilter_ = java.util.Collections.unmodifiableList(bloomFilter_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.bloomFilter_ = bloomFilter_; + } else { + result.bloomFilter_ = bloomFilterBuilder_.build(); + } + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex) { + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex other) { + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex.getDefaultInstance()) return this; + if (bloomFilterBuilder_ == null) { + if (!other.bloomFilter_.isEmpty()) { + if (bloomFilter_.isEmpty()) { + bloomFilter_ = other.bloomFilter_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureBloomFilterIsMutable(); + bloomFilter_.addAll(other.bloomFilter_); + } + onChanged(); + } + } else { + if (!other.bloomFilter_.isEmpty()) { + if (bloomFilterBuilder_.isEmpty()) { + bloomFilterBuilder_.dispose(); + bloomFilterBuilder_ = null; + bloomFilter_ = other.bloomFilter_; + bitField0_ = (bitField0_ & ~0x00000001); + bloomFilterBuilder_ = + com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders ? + getBloomFilterFieldBuilder() : null; + } else { + bloomFilterBuilder_.addAllMessages(other.bloomFilter_); + } + } + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + private java.util.List bloomFilter_ = + java.util.Collections.emptyList(); + private void ensureBloomFilterIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + bloomFilter_ = new java.util.ArrayList(bloomFilter_); + bitField0_ |= 0x00000001; + } + } + + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder> bloomFilterBuilder_; + + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public java.util.List getBloomFilterList() { + if (bloomFilterBuilder_ == null) { + return java.util.Collections.unmodifiableList(bloomFilter_); + } else { + return bloomFilterBuilder_.getMessageList(); + } + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public int getBloomFilterCount() { + if (bloomFilterBuilder_ == null) { + return bloomFilter_.size(); + } else { + return bloomFilterBuilder_.getCount(); + } + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter getBloomFilter(int index) { + if (bloomFilterBuilder_ == null) { + return bloomFilter_.get(index); + } else { + return bloomFilterBuilder_.getMessage(index); + } + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder setBloomFilter( + int index, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter value) { + if (bloomFilterBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureBloomFilterIsMutable(); + bloomFilter_.set(index, value); + onChanged(); + } else { + bloomFilterBuilder_.setMessage(index, value); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder setBloomFilter( + int index, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder builderForValue) { + if (bloomFilterBuilder_ == null) { + ensureBloomFilterIsMutable(); + bloomFilter_.set(index, builderForValue.build()); + onChanged(); + } else { + bloomFilterBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder addBloomFilter(org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter value) { + if (bloomFilterBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureBloomFilterIsMutable(); + bloomFilter_.add(value); + onChanged(); + } else { + bloomFilterBuilder_.addMessage(value); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder addBloomFilter( + int index, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter value) { + if (bloomFilterBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureBloomFilterIsMutable(); + bloomFilter_.add(index, value); + onChanged(); + } else { + bloomFilterBuilder_.addMessage(index, value); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder addBloomFilter( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder builderForValue) { + if (bloomFilterBuilder_ == null) { + ensureBloomFilterIsMutable(); + bloomFilter_.add(builderForValue.build()); + onChanged(); + } else { + bloomFilterBuilder_.addMessage(builderForValue.build()); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder addBloomFilter( + int index, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder builderForValue) { + if (bloomFilterBuilder_ == null) { + ensureBloomFilterIsMutable(); + bloomFilter_.add(index, builderForValue.build()); + onChanged(); + } else { + bloomFilterBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder addAllBloomFilter( + java.lang.Iterable values) { + if (bloomFilterBuilder_ == null) { + ensureBloomFilterIsMutable(); + super.addAll(values, bloomFilter_); + onChanged(); + } else { + bloomFilterBuilder_.addAllMessages(values); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder clearBloomFilter() { + if (bloomFilterBuilder_ == null) { + bloomFilter_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + } else { + bloomFilterBuilder_.clear(); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public Builder removeBloomFilter(int index) { + if (bloomFilterBuilder_ == null) { + ensureBloomFilterIsMutable(); + bloomFilter_.remove(index); + onChanged(); + } else { + bloomFilterBuilder_.remove(index); + } + return this; + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder getBloomFilterBuilder( + int index) { + return getBloomFilterFieldBuilder().getBuilder(index); + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder getBloomFilterOrBuilder( + int index) { + if (bloomFilterBuilder_ == null) { + return bloomFilter_.get(index); } else { + return bloomFilterBuilder_.getMessageOrBuilder(index); + } + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public java.util.List + getBloomFilterOrBuilderList() { + if (bloomFilterBuilder_ != null) { + return bloomFilterBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(bloomFilter_); + } + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder addBloomFilterBuilder() { + return getBloomFilterFieldBuilder().addBuilder( + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.getDefaultInstance()); + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder addBloomFilterBuilder( + int index) { + return getBloomFilterFieldBuilder().addBuilder( + index, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.getDefaultInstance()); + } + /** + * repeated .org.apache.hadoop.hive.ql.io.orc.BloomFilter bloomFilter = 1; + */ + public java.util.List + getBloomFilterBuilderList() { + return getBloomFilterFieldBuilder().getBuilderList(); + } + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder> + getBloomFilterFieldBuilder() { + if (bloomFilterBuilder_ == null) { + bloomFilterBuilder_ = new com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilter.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterOrBuilder>( + bloomFilter_, + ((bitField0_ & 0x00000001) == 0x00000001), + getParentForChildren(), + isClean()); + bloomFilter_ = null; + } + return bloomFilterBuilder_; + } + + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.BloomFilterIndex) + } + + static { + defaultInstance = new BloomFilterIndex(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.BloomFilterIndex) + } + public interface StreamOrBuilder extends com.google.protobuf.MessageOrBuilder { @@ -8215,6 +9439,10 @@ public Stream parsePartialFrom( * ROW_INDEX = 6; */ ROW_INDEX(6, 6), + /** + * BLOOM_FILTER = 7; + */ + BLOOM_FILTER(7, 7), ; /** @@ -8245,6 +9473,10 @@ public Stream parsePartialFrom( * ROW_INDEX = 6; */ public static final int ROW_INDEX_VALUE = 6; + /** + * BLOOM_FILTER = 7; + */ + public static final int BLOOM_FILTER_VALUE = 7; public final int getNumber() { return value; } @@ -8258,6 +9490,7 @@ public static Kind valueOf(int value) { case 4: return DICTIONARY_COUNT; case 5: return SECONDARY; case 6: return ROW_INDEX; + case 7: return BLOOM_FILTER; default: return null; } } @@ -17668,6 +18901,16 @@ public Builder setMagicBytes( com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_fieldAccessorTable; private static com.google.protobuf.Descriptors.Descriptor + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor; private static com.google.protobuf.GeneratedMessage.FieldAccessorTable @@ -17760,55 +19003,59 @@ public Builder setMagicBytes( "tics\030\002 \001(\01322.org.apache.hadoop.hive.ql.i" + "o.orc.ColumnStatistics\"J\n\010RowIndex\022>\n\005en" + "try\030\001 \003(\0132/.org.apache.hadoop.hive.ql.io" + - ".orc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kind\030\001 " + - "\002(\0162-.org.apache.hadoop.hive.ql.io.orc.S" + - "tream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003 \001" + - "(\004\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LEN" + - "GTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONARY" + - "_COUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006\"\263", - "\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.org.ap" + - "ache.hadoop.hive.ql.io.orc.ColumnEncodin" + - "g.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind\022\n" + - "\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V2\020" + - "\002\022\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFooter\0229\n" + - "\007streams\030\001 \003(\0132(.org.apache.hadoop.hive." + - "ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org." + - "apache.hadoop.hive.ql.io.orc.ColumnEncod" + - "ing\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache." + - "hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010subty", - "pes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmax" + - "imumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005s" + - "cale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE" + - "\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOA" + - "T\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022" + - "\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STR" + - "UCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022" + - "\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformat" + - "ion\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004" + - "\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001", - "(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadata" + - "Item\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020Str" + - "ipeStatistics\022D\n\010colStats\030\001 \003(\01322.org.ap" + - "ache.hadoop.hive.ql.io.orc.ColumnStatist" + - "ics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\01322.o" + - "rg.apache.hadoop.hive.ql.io.orc.StripeSt" + - "atistics\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 \001(" + - "\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030\003 \003(" + - "\01323.org.apache.hadoop.hive.ql.io.orc.Str" + - "ipeInformation\0225\n\005types\030\004 \003(\0132&.org.apac", - "he.hadoop.hive.ql.io.orc.Type\022D\n\010metadat" + - "a\030\005 \003(\01322.org.apache.hadoop.hive.ql.io.o" + - "rc.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001(" + - "\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.hadoo" + - "p.hive.ql.io.orc.ColumnStatistics\022\026\n\016row" + - "IndexStride\030\010 \001(\r\"\334\001\n\nPostScript\022\024\n\014foot" + - "erLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621.or" + - "g.apache.hadoop.hive.ql.io.orc.Compressi" + - "onKind\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007" + - "version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(", - "\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t" + - "*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022" + - "\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" + ".orc.RowIndexEntry\"7\n\013BloomFilter\022\030\n\020num" + + "HashFunctions\030\001 \001(\r\022\016\n\006bitset\030\002 \003(\006\"V\n\020B" + + "loomFilterIndex\022B\n\013bloomFilter\030\001 \003(\0132-.o" + + "rg.apache.hadoop.hive.ql.io.orc.BloomFil" + + "ter\"\354\001\n\006Stream\022;\n\004kind\030\001 \002(\0162-.org.apach" + + "e.hadoop.hive.ql.io.orc.Stream.Kind\022\016\n\006c", + "olumn\030\002 \001(\r\022\016\n\006length\030\003 \001(\004\"\204\001\n\004Kind\022\013\n\007" + + "PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGTH\020\002\022\023\n\017DICTI" + + "ONARY_DATA\020\003\022\024\n\020DICTIONARY_COUNT\020\004\022\r\n\tSE" + + "CONDARY\020\005\022\r\n\tROW_INDEX\020\006\022\020\n\014BLOOM_FILTER" + + "\020\007\"\263\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.or" + + "g.apache.hadoop.hive.ql.io.orc.ColumnEnc" + + "oding.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Ki" + + "nd\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT" + + "_V2\020\002\022\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFoote" + + "r\0229\n\007streams\030\001 \003(\0132(.org.apache.hadoop.h", + "ive.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320." + + "org.apache.hadoop.hive.ql.io.orc.ColumnE" + + "ncoding\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apa" + + "che.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010s" + + "ubtypes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n" + + "\rmaximumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022" + + "\r\n\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004" + + "BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005" + + "FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINAR" + + "Y\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n", + "\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DAT" + + "E\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInfo" + + "rmation\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002" + + " \001(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength" + + "\030\004 \001(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMeta" + + "dataItem\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n" + + "\020StripeStatistics\022D\n\010colStats\030\001 \003(\01322.or" + + "g.apache.hadoop.hive.ql.io.orc.ColumnSta" + + "tistics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\013" + + "22.org.apache.hadoop.hive.ql.io.orc.Stri", + "peStatistics\"\356\002\n\006Footer\022\024\n\014headerLength\030" + + "\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030" + + "\003 \003(\01323.org.apache.hadoop.hive.ql.io.orc" + + ".StripeInformation\0225\n\005types\030\004 \003(\0132&.org." + + "apache.hadoop.hive.ql.io.orc.Type\022D\n\010met" + + "adata\030\005 \003(\01322.org.apache.hadoop.hive.ql." + + "io.orc.UserMetadataItem\022\024\n\014numberOfRows\030" + + "\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.h" + + "adoop.hive.ql.io.orc.ColumnStatistics\022\026\n" + + "\016rowIndexStride\030\010 \001(\r\"\334\001\n\nPostScript\022\024\n\014", + "footerLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\0162" + + "1.org.apache.hadoop.hive.ql.io.orc.Compr" + + "essionKind\022\034\n\024compressionBlockSize\030\003 \001(\004" + + "\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030" + + "\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005magic\030\300>" + + " \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLI" + + "B\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -17881,62 +19128,74 @@ public Builder setMagicBytes( com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_descriptor, new java.lang.String[] { "Entry", }); - internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor = + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor = getDescriptor().getMessageTypes().get(11); + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilter_descriptor, + new java.lang.String[] { "NumHashFunctions", "Bitset", }); + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor = + getDescriptor().getMessageTypes().get(12); + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_org_apache_hadoop_hive_ql_io_orc_BloomFilterIndex_descriptor, + new java.lang.String[] { "BloomFilter", }); + internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor = + getDescriptor().getMessageTypes().get(13); internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor, new java.lang.String[] { "Kind", "Column", "Length", }); internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor = - getDescriptor().getMessageTypes().get(12); + getDescriptor().getMessageTypes().get(14); internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor, new java.lang.String[] { "Kind", "DictionarySize", }); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor = - getDescriptor().getMessageTypes().get(13); + getDescriptor().getMessageTypes().get(15); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor, new java.lang.String[] { "Streams", "Columns", }); internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor = - getDescriptor().getMessageTypes().get(14); + getDescriptor().getMessageTypes().get(16); internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor, new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", "Precision", "Scale", }); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor = - getDescriptor().getMessageTypes().get(15); + getDescriptor().getMessageTypes().get(17); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor, new java.lang.String[] { "Offset", "IndexLength", "DataLength", "FooterLength", "NumberOfRows", }); internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor = - getDescriptor().getMessageTypes().get(16); + getDescriptor().getMessageTypes().get(18); internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor, new java.lang.String[] { "Name", "Value", }); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeStatistics_descriptor = - getDescriptor().getMessageTypes().get(17); + getDescriptor().getMessageTypes().get(19); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeStatistics_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_StripeStatistics_descriptor, new java.lang.String[] { "ColStats", }); internal_static_org_apache_hadoop_hive_ql_io_orc_Metadata_descriptor = - getDescriptor().getMessageTypes().get(18); + getDescriptor().getMessageTypes().get(20); internal_static_org_apache_hadoop_hive_ql_io_orc_Metadata_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Metadata_descriptor, new java.lang.String[] { "StripeStats", }); internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor = - getDescriptor().getMessageTypes().get(19); + getDescriptor().getMessageTypes().get(21); internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor, new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", }); internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor = - getDescriptor().getMessageTypes().get(20); + getDescriptor().getMessageTypes().get(22); internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilter.java new file mode 100644 index 0000000..2d4fd13 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilter.java @@ -0,0 +1,298 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.filters; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.io.orc.OrcProto; + +import com.google.common.primitives.Longs; + +/** + * BloomFilter is a probabilistic data structure for set membership check. BloomFilters are + * highly space efficient when compared to using a HashSet. Because of the probabilistic nature of + * bloom filter false positive (element not present in bloom filter but test() says true) are + * possible but false negatives are not possible (if element is present then test() will never + * say false). The false positive probability is configurable (default: 5%) depending on which + * storage requirement may increase or decrease. Lower the false positive probability greater + * is the space requirement. + * Bloom filters are sensitive to number of elements that will be inserted in the bloom filter. + * During the creation of bloom filter expected number of entries must be specified. If the number + * of insertions exceed the specified initial number of entries then false positive probability will + * increase accordingly. + * + * Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash + * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it suffers from hash + * collisions for specific sequence of repeating bytes. Check the following link for more info + * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw + */ +public class BloomFilter { + public static final double DEFAULT_FPP = 0.05; + private BitSet bitSet; + private int m; + private int k; + + public BloomFilter(long expectedEntries) { + this(expectedEntries, DEFAULT_FPP); + } + + public BloomFilter(long expectedEntries, double fpp) { + checkArgument(expectedEntries > 0, "expectedEntries should be > 0"); + checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0"); + int nb = optimalNumOfBits(expectedEntries, fpp); + // make 'm' multiple of 64 + this.m = nb + (Long.SIZE - (nb % Long.SIZE)); + this.k = optimalNumOfHashFunctions(expectedEntries, m); + this.bitSet = new BitSet(m); + } + + public BloomFilter(OrcProto.BloomFilter bloomFilter) { + this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList())); + this.k = bloomFilter.getNumHashFunctions(); + this.m = (int) this.bitSet.bitSize(); + } + + static int optimalNumOfHashFunctions(long n, long m) { + return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); + } + + static int optimalNumOfBits(long n, double p) { + return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2))); + } + + public void add(byte[] val) { + if (val == null) { + addBytes(val, -1); + } else { + addBytes(val, val.length); + } + } + + public void addBytes(byte[] val, int length) { + // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" + // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively + // implement a Bloom filter without any loss in the asymptotic false positive probability' + + // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned + // in the above paper + long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, length); + addHash(hash64); + } + + private void addHash(long hash64) { + int hash1 = (int) hash64; + int hash2 = (int) (hash64 >>> 32); + + for (int i = 1; i <= k; i++) { + int combinedHash = hash1 + (i * hash2); + // hashcode should be positive, flip all the bits if it's negative + if (combinedHash < 0) { + combinedHash = ~combinedHash; + } + int pos = combinedHash % m; + bitSet.set(pos); + } + } + + public void addString(String val) { + if (val == null) { + add(null); + } else { + add(val.getBytes()); + } + } + + public void addLong(long val) { + addHash(getLongHash(val)); + } + + public void addDouble(double val) { + addLong(Double.doubleToLongBits(val)); + } + + public boolean test(byte[] val) { + if (val == null) { + return testBytes(val, -1); + } + return testBytes(val, val.length); + } + + public boolean testBytes(byte[] val, int length) { + long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, length); + return testHash(hash64); + } + + private boolean testHash(long hash64) { + int hash1 = (int) hash64; + int hash2 = (int) (hash64 >>> 32); + + for (int i = 1; i <= k; i++) { + int combinedHash = hash1 + (i * hash2); + // hashcode should be positive, flip all the bits if it's negative + if (combinedHash < 0) { + combinedHash = ~combinedHash; + } + int pos = combinedHash % m; + if (!bitSet.get(pos)) { + return false; + } + } + return true; + } + + public boolean testString(String val) { + if (val == null) { + return test(null); + } else { + return test(val.getBytes()); + } + } + + public boolean testLong(long val) { + return testHash(getLongHash(val)); + } + + // Thomas Wang's integer hash function + // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm + private long getLongHash(long key) { + key = (~key) + (key << 21); // key = (key << 21) - key - 1; + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); // key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); // key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; + } + + public boolean testDouble(double val) { + return testLong(Double.doubleToLongBits(val)); + } + + public long sizeInBytes() { + return getBitSize() / 8; + } + + public int getBitSize() { + return bitSet.getData().length * Long.SIZE; + } + + public int getNumHashFunctions() { + return k; + } + + public long[] getBitSet() { + return bitSet.getData(); + } + + @Override + public String toString() { + return "m: " + m + " k: " + k; + } + + /** + * Merge the specified bloom filter with current bloom filter. + * + * @param that - bloom filter to merge + */ + public void merge(BloomFilter that) { + if (this != that && this.m == that.m && this.k == that.k) { + this.bitSet.putAll(that.bitSet); + } else { + throw new IllegalArgumentException("BloomFilters are not compatible for merging." + + " this - " + this.toString() + " that - " + that.toString()); + } + } + + public void reset() { + this.bitSet.clear(); + } + + /** + * Bare metal bit set implementation. For performance reasons, this implementation does not check + * for index bounds nor expand the bit set size if the specified index is greater than the size. + */ + private class BitSet { + final long[] data; + + BitSet(long bits) { + this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]); + } + + /** + * Deserialize long array as bit set. + * + * @param data - bit array + */ + BitSet(long[] data) { + assert data.length > 0 : "data length is zero!"; + this.data = data; + } + + /** + * Sets the bit at specified index. + * + * @param index - position + */ + void set(long index) { + data[(int) (index >>> 6)] |= (1L << index); + } + + /** + * Returns true if the bit is set in the specified index. + * + * @param index - position + * @return - value at the bit position + */ + boolean get(long index) { + return (data[(int) (index >>> 6)] & (1L << index)) != 0; + } + + /** + * Number of bits + */ + long bitSize() { + return (long) data.length * Long.SIZE; + } + + long[] getData() { + return data; + } + + /** + * Combines the two BitArrays using bitwise OR. + */ + void putAll(BitSet array) { + assert data.length == array.data.length : + "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")"; + for (int i = 0; i < data.length; i++) { + data[i] |= array.data[i]; + } + } + + /** + * Clear the bit set. + */ + public void clear() { + Arrays.fill(data, 0); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/filters/Murmur3.java ql/src/java/org/apache/hadoop/hive/ql/io/filters/Murmur3.java new file mode 100644 index 0000000..e733892 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/filters/Murmur3.java @@ -0,0 +1,334 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.filters; + +/** + * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms. + * + * Murmur3 32 and 128 bit variants. + * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94 + * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255 + * + * This is a public domain code with no copyrights. + * From homepage of MurmurHash (https://code.google.com/p/smhasher/), + * "All MurmurHash versions are public domain software, and the author disclaims all copyright + * to their code." + */ +public class Murmur3 { + // from 64-bit linear congruential generator + public static final long NULL_HASHCODE = 2862933555777941757L; + + // Constants for 32 bit variant + private static final int C1_32 = 0xcc9e2d51; + private static final int C2_32 = 0x1b873593; + private static final int R1_32 = 15; + private static final int R2_32 = 13; + private static final int M_32 = 5; + private static final int N_32 = 0xe6546b64; + + // Constants for 128 bit variant + private static final long C1 = 0x87c37b91114253d5L; + private static final long C2 = 0x4cf5ad432745937fL; + private static final int R1 = 31; + private static final int R2 = 27; + private static final int R3 = 33; + private static final int M = 5; + private static final int N1 = 0x52dce729; + private static final int N2 = 0x38495ab5; + + private static final int DEFAULT_SEED = 104729; + + /** + * Murmur3 32-bit variant. + * + * @param data - input byte array + * @return - hashcode + */ + public static int hash32(byte[] data) { + return hash32(data, data.length, DEFAULT_SEED); + } + + /** + * Murmur3 32-bit variant. + * + * @param data - input byte array + * @param length - length of array + * @param seed - seed. (default 0) + * @return - hashcode + */ + public static int hash32(byte[] data, int length, int seed) { + int hash = seed; + final int nblocks = length >> 2; + + // body + for (int i = 0; i < nblocks; i++) { + int i_4 = i << 2; + int k = (data[i_4] & 0xff) + | ((data[i_4 + 1] & 0xff) << 8) + | ((data[i_4 + 2] & 0xff) << 16) + | ((data[i_4 + 3] & 0xff) << 24); + + // mix functions + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + } + + // tail + int idx = nblocks << 2; + int k1 = 0; + switch (length - idx) { + case 3: + k1 ^= data[idx + 2] << 16; + case 2: + k1 ^= data[idx + 1] << 8; + case 1: + k1 ^= data[idx]; + + // mix functions + k1 *= C1_32; + k1 = Integer.rotateLeft(k1, R1_32); + k1 *= C2_32; + hash ^= k1; + } + + // finalization + hash ^= length; + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + + return hash; + } + + /** + * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. + * + * @param data - input byte array + * @return - hashcode + */ + public static long hash64(byte[] data) { + return hash64(data, data.length, DEFAULT_SEED); + } + + public static long hash64(byte[] data, int length) { + return hash64(data, length, DEFAULT_SEED); + } + + /** + * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. + * + * @param data - input byte array + * @param length - length of array + * @param seed - seed. (default is 0) + * @return - hashcode + */ + public static long hash64(byte[] data, int length, int seed) { + long hash = seed; + final int nblocks = length >> 3; + + // body + for (int i = 0; i < nblocks; i++) { + final int i8 = i << 3; + long k = ((long) data[i8] & 0xff) + | (((long) data[i8 + 1] & 0xff) << 8) + | (((long) data[i8 + 2] & 0xff) << 16) + | (((long) data[i8 + 3] & 0xff) << 24) + | (((long) data[i8 + 4] & 0xff) << 32) + | (((long) data[i8 + 5] & 0xff) << 40) + | (((long) data[i8 + 6] & 0xff) << 48) + | (((long) data[i8 + 7] & 0xff) << 56); + + // mix functions + k *= C1; + k = Long.rotateLeft(k, R1); + k *= C2; + hash ^= k; + hash = Long.rotateLeft(hash, R2) * M + N1; + } + + // tail + long k1 = 0; + int tailStart = nblocks << 3; + switch (length - tailStart) { + case 7: + k1 ^= ((long) data[tailStart + 6] & 0xff) << 48; + case 6: + k1 ^= ((long) data[tailStart + 5] & 0xff) << 40; + case 5: + k1 ^= ((long) data[tailStart + 4] & 0xff) << 32; + case 4: + k1 ^= ((long) data[tailStart + 3] & 0xff) << 24; + case 3: + k1 ^= ((long) data[tailStart + 2] & 0xff) << 16; + case 2: + k1 ^= ((long) data[tailStart + 1] & 0xff) << 8; + case 1: + k1 ^= ((long) data[tailStart] & 0xff); + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; + } + + // finalization + hash ^= length; + hash = fmix64(hash); + + return hash; + } + + /** + * Murmur3 128-bit variant. + * + * @param data - input byte array + * @return - hashcode (2 longs) + */ + public static long[] hash128(byte[] data) { + return hash128(data, data.length, DEFAULT_SEED); + } + + /** + * Murmur3 128-bit variant. + * + * @param data - input byte array + * @param length - length of array + * @param seed - seed. (default is 0) + * @return - hashcode (2 longs) + */ + public static long[] hash128(byte[] data, int length, int seed) { + long h1 = seed; + long h2 = seed; + final int nblocks = length >> 4; + + // body + for (int i = 0; i < nblocks; i++) { + final int i16 = i << 4; + long k1 = ((long) data[i16] & 0xff) + | (((long) data[i16 + 1] & 0xff) << 8) + | (((long) data[i16 + 2] & 0xff) << 16) + | (((long) data[i16 + 3] & 0xff) << 24) + | (((long) data[i16 + 4] & 0xff) << 32) + | (((long) data[i16 + 5] & 0xff) << 40) + | (((long) data[i16 + 6] & 0xff) << 48) + | (((long) data[i16 + 7] & 0xff) << 56); + + long k2 = ((long) data[i16 + 8] & 0xff) + | (((long) data[i16 + 9] & 0xff) << 8) + | (((long) data[i16 + 10] & 0xff) << 16) + | (((long) data[i16 + 11] & 0xff) << 24) + | (((long) data[i16 + 12] & 0xff) << 32) + | (((long) data[i16 + 13] & 0xff) << 40) + | (((long) data[i16 + 14] & 0xff) << 48) + | (((long) data[i16 + 15] & 0xff) << 56); + + // mix functions for k1 + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + h1 ^= k1; + h1 = Long.rotateLeft(h1, R2); + h1 += h2; + h1 = h1 * M + N1; + + // mix functions for k2 + k2 *= C2; + k2 = Long.rotateLeft(k2, R3); + k2 *= C1; + h2 ^= k2; + h2 = Long.rotateLeft(h2, R1); + h2 += h1; + h2 = h2 * M + N2; + } + + // tail + long k1 = 0; + long k2 = 0; + int tailStart = nblocks << 4; + switch (length - tailStart) { + case 15: + k2 ^= (long) (data[tailStart + 14] & 0xff) << 48; + case 14: + k2 ^= (long) (data[tailStart + 13] & 0xff) << 40; + case 13: + k2 ^= (long) (data[tailStart + 12] & 0xff) << 32; + case 12: + k2 ^= (long) (data[tailStart + 11] & 0xff) << 24; + case 11: + k2 ^= (long) (data[tailStart + 10] & 0xff) << 16; + case 10: + k2 ^= (long) (data[tailStart + 9] & 0xff) << 8; + case 9: + k2 ^= (long) (data[tailStart + 8] & 0xff); + k2 *= C2; + k2 = Long.rotateLeft(k2, R3); + k2 *= C1; + h2 ^= k2; + + case 8: + k1 ^= (long) (data[tailStart + 7] & 0xff) << 56; + case 7: + k1 ^= (long) (data[tailStart + 6] & 0xff) << 48; + case 6: + k1 ^= (long) (data[tailStart + 5] & 0xff) << 40; + case 5: + k1 ^= (long) (data[tailStart + 4] & 0xff) << 32; + case 4: + k1 ^= (long) (data[tailStart + 3] & 0xff) << 24; + case 3: + k1 ^= (long) (data[tailStart + 2] & 0xff) << 16; + case 2: + k1 ^= (long) (data[tailStart + 1] & 0xff) << 8; + case 1: + k1 ^= (long) (data[tailStart] & 0xff); + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + h1 ^= k1; + } + + // finalization + h1 ^= length; + h2 ^= length; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + return new long[]{h1, h2}; + } + + private static long fmix64(long h) { + h ^= (h >>> 33); + h *= 0xff51afd7ed558ccdL; + h ^= (h >>> 33); + h *= 0xc4ceb9fe1a85ec53L; + h ^= (h >>> 33); + return h; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 79279ea..889a69d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -17,6 +17,14 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -25,6 +33,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.filters.BloomFilter; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -37,14 +46,6 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - /** * A tool for printing out the file structure of ORC files. */ @@ -114,6 +115,7 @@ private static void printMetaData(List files, Configuration conf, } } ColumnStatistics[] stats = reader.getStatistics(); + int colCount = stats.length; System.out.println("\nFile Statistics:"); for(int i=0; i < stats.length; ++i) { System.out.println(" Column " + i + ": " + stats[i].toString()); @@ -147,38 +149,20 @@ private static void printMetaData(List files, Configuration conf, } System.out.println(buf); } - if (rowIndexCols != null) { - RowIndex[] indices = rows.readRowIndex(stripeIx); + if (rowIndexCols != null && !rowIndexCols.isEmpty()) { + // include the columns that are specified, only if the columns are included, bloom filter + // will be read + boolean[] sargColumns = new boolean[colCount]; + for (int colIdx : rowIndexCols) { + sargColumns[colIdx] = true; + } + RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, sargColumns); for (int col : rowIndexCols) { StringBuilder buf = new StringBuilder(); - buf.append(" Row group index column ").append(col).append(":"); - RowIndex index = null; - if ((col >= indices.length) || ((index = indices[col]) == null)) { - buf.append(" not found\n"); - continue; - } - for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) { - buf.append("\n Entry ").append(entryIx).append(":"); - RowIndexEntry entry = index.getEntry(entryIx); - if (entry == null) { - buf.append("unknown\n"); - continue; - } - OrcProto.ColumnStatistics colStats = entry.getStatistics(); - if (colStats == null) { - buf.append("no stats at "); - } else { - ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats); - buf.append(cs.toString()); - } - buf.append(" positions: "); - for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) { - if (posIx != 0) { - buf.append(","); - } - buf.append(entry.getPositions(posIx)); - } - } + String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex()); + buf.append(rowIdxString); + String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex()); + buf.append(bloomFilString); System.out.println(buf); } } @@ -197,6 +181,82 @@ private static void printMetaData(List files, Configuration conf, } } + private static String getFormattedBloomFilters(int col, + OrcProto.BloomFilterIndex[] bloomFilterIndex) { + StringBuilder buf = new StringBuilder(); + BloomFilter stripeLevelBF = null; + if (bloomFilterIndex != null && bloomFilterIndex[col] != null) { + int idx = 0; + buf.append("\n Bloom filters for column ").append(col).append(":"); + for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) { + BloomFilter toMerge = new BloomFilter(bf); + buf.append("\n Entry ").append(idx++).append(":").append(getBloomFilterStats(toMerge)); + if (stripeLevelBF == null) { + stripeLevelBF = toMerge; + } else { + stripeLevelBF.merge(toMerge); + } + } + String bloomFilterStats = getBloomFilterStats(stripeLevelBF); + buf.append("\n Stripe level merge:").append(bloomFilterStats); + } + return buf.toString(); + } + + private static String getBloomFilterStats(BloomFilter bf) { + StringBuilder sb = new StringBuilder(); + int bitCount = bf.getBitSize(); + int popCount = 0; + for (long l : bf.getBitSet()) { + popCount += Long.bitCount(l); + } + int k = bf.getNumHashFunctions(); + float loadFactor = (float) popCount / (float) bitCount; + float expectedFpp = (float) Math.pow(loadFactor, k); + DecimalFormat df = new DecimalFormat("###.####"); + sb.append(" numHashFunctions: ").append(k); + sb.append(" bitCount: ").append(bitCount); + sb.append(" popCount: ").append(popCount); + sb.append(" loadFactor: ").append(df.format(loadFactor)); + sb.append(" expectedFpp: ").append(expectedFpp); + return sb.toString(); + } + + private static String getFormattedRowIndices(int col, RowIndex[] rowGroupIndex) { + StringBuilder buf = new StringBuilder(); + RowIndex index; + buf.append(" Row group indices for column ").append(col).append(":"); + if (rowGroupIndex == null || (col >= rowGroupIndex.length) || + ((index = rowGroupIndex[col]) == null)) { + buf.append(" not found\n"); + return buf.toString(); + } + + for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) { + buf.append("\n Entry ").append(entryIx).append(": "); + RowIndexEntry entry = index.getEntry(entryIx); + if (entry == null) { + buf.append("unknown\n"); + continue; + } + OrcProto.ColumnStatistics colStats = entry.getStatistics(); + if (colStats == null) { + buf.append("no stats at "); + } else { + ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats); + buf.append(cs.toString()); + } + buf.append(" positions: "); + for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) { + if (posIx != 0) { + buf.append(","); + } + buf.append(entry.getPositions(posIx)); + } + } + return buf.toString(); + } + private static long getTotalPaddingSize(Reader reader) throws IOException { long paddedBytes = 0; List stripes = reader.getStripes(); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index b46937c..25bb15a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hive.ql.io.orc; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT; + import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.filters.BloomFilter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*; - /** * Contains factory methods to read or write ORC files. */ @@ -148,7 +155,9 @@ private WriterVersion(int id) { ROW_INDEX_STRIDE("orc.row.index.stride"), ENABLE_INDEXES("orc.create.index"), BLOCK_PADDING("orc.block.padding"), - ENCODING_STRATEGY("orc.encoding.strategy"); + ENCODING_STRATEGY("orc.encoding.strategy"), + BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns"), + BLOOM_FILTER_FPP("orc.bloom.filter.fpp"); private final String propName; @@ -256,6 +265,8 @@ public static Reader createReader(Path path, private EncodingStrategy encodingStrategy; private CompressionStrategy compressionStrategy; private float paddingTolerance; + private String bloomFilterColumns; + private double bloomFilterFpp; WriterOptions(Configuration conf) { configuration = conf; @@ -288,9 +299,9 @@ public static Reader createReader(Path path, compressionStrategy = CompressionStrategy.valueOf(compString); } - paddingTolerance = - conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname, - HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal); + paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname, + HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal); + bloomFilterFpp = BloomFilter.DEFAULT_FPP; } /** @@ -367,6 +378,24 @@ public WriterOptions paddingTolerance(float value) { } /** + * Comma separated values of column names for which bloom filter is to be created. + */ + public WriterOptions bloomFilterColumns(String columns) { + bloomFilterColumns = columns; + return this; + } + + /** + * Specify the false positive probability for bloom filter. + * @param fpp - false positive probability + * @return + */ + public WriterOptions bloomFilterFpp(double fpp) { + bloomFilterFpp = fpp; + return this; + } + + /** * Sets the generic compression that is used to compress the data. */ public WriterOptions compress(CompressionKind value) { @@ -438,8 +467,8 @@ public static Writer createWriter(Path path, opts.memoryManagerValue, opts.blockPaddingValue, opts.versionValue, opts.callback, opts.encodingStrategy, opts.compressionStrategy, - opts.paddingTolerance, - opts.blockSizeValue); + opts.paddingTolerance, opts.blockSizeValue, + opts.bloomFilterColumns, opts.bloomFilterFpp); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 8c44e3e..498ee14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -18,9 +18,18 @@ package org.apache.hadoop.hive.ql.io.orc; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -65,18 +74,9 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.TreeMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * A MapReduce/Hive input format for ORC files. *

@@ -920,7 +920,7 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; - truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred)); + truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } else { // parition column case. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java index 5bd3f0c..8625ff1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java @@ -17,12 +17,17 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Properties; + import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.RecordUpdater; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy; import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -41,11 +46,6 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.Progressable; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Properties; - /** * A Hive OutputFormat for ORC files. */ @@ -170,6 +170,16 @@ private String getSettingFromPropsFallingBackToConf(String key, Properties props options.encodingStrategy(EncodingStrategy.valueOf(propVal)); } + if ((propVal = getSettingFromPropsFallingBackToConf( + OrcFile.OrcTableProperties.BLOOM_FILTER_COLUMNS.getPropName(), props, conf)) != null) { + options.bloomFilterColumns(propVal); + } + + if ((propVal = getSettingFromPropsFallingBackToConf( + OrcFile.OrcTableProperties.BLOOM_FILTER_FPP.getPropName(), props, conf)) != null) { + options.bloomFilterFpp(Double.parseDouble(propVal)); + } + return options; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java new file mode 100644 index 0000000..ba59b35 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java @@ -0,0 +1,203 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; + +import com.google.common.collect.Lists; + +public class OrcUtils { + private static final Log LOG = LogFactory.getLog(OrcUtils.class); + + /** + * Returns selected columns as a boolean array with true value set for specified column names. + * The result will contain number of elements equal to flattened number of columns. + * For example: + * selectedColumns - a,b,c + * allColumns - a,b,c,d + * If column c is a complex type, say list and other types are primitives then result will + * be [false, true, true, true, true, true, false] + * Index 0 is the root element of the struct which is set to false by default, index 1,2 + * corresponds to columns a and b. Index 3,4 correspond to column c which is list and + * index 5 correspond to column d. After flattening list gets 2 columns. + * + * @param selectedColumns - comma separated list of selected column names + * @param allColumns - comma separated list of all column names + * @param inspector - object inspector + * @return - boolean array with true value set for the specified column names + */ + public static boolean[] includeColumns(String selectedColumns, String allColumns, + ObjectInspector inspector) { + int numFlattenedCols = getFlattenedColumnsCount(inspector); + boolean[] results = new boolean[numFlattenedCols]; + if (selectedColumns != null && !selectedColumns.isEmpty()) { + includeColumnsImpl(results, selectedColumns, allColumns, inspector); + } + return results; + } + + private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns, + String allColumns, + ObjectInspector inspector) { + Map> columnSpanMap = getColumnSpan(allColumns, inspector); + LOG.info("columnSpanMap: " + columnSpanMap); + + String[] selCols = selectedColumns.split(","); + for (String sc : selCols) { + if (columnSpanMap.containsKey(sc)) { + List colSpan = columnSpanMap.get(sc); + int start = colSpan.get(0); + int end = colSpan.get(1); + for (int i = start; i <= end; i++) { + includeColumns[i] = true; + } + } + } + + LOG.info("includeColumns: " + Arrays.toString(includeColumns)); + } + + private static Map> getColumnSpan(String allColumns, + ObjectInspector inspector) { + // map that contains the column span for each column. Column span is the number of columns + // required after flattening. For a given object inspector this map contains the start column + // id and end column id (both inclusive) after flattening. + // EXAMPLE: + // schema: struct> + // column span map for the above struct will be + // a => [1,1], b => [2,2], c => [3,5] + Map> columnSpanMap = new HashMap>(); + if (allColumns != null) { + String[] columns = allColumns.split(","); + int startIdx = 0; + int endIdx = 0; + if (inspector instanceof StructObjectInspector) { + StructObjectInspector soi = (StructObjectInspector) inspector; + List fields = soi.getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); i++) { + StructField sf = fields.get(i); + + // we get the type (category) from object inspector but column name from the argument. + // The reason for this is hive (FileSinkOperator) does not pass the actual column names, + // instead it passes the internal column names (_col1,_col2). + ObjectInspector sfOI = sf.getFieldObjectInspector(); + String colName = columns[i]; + + startIdx = endIdx + 1; + switch (sfOI.getCategory()) { + case PRIMITIVE: + endIdx += 1; + break; + case STRUCT: + endIdx += 1; + StructObjectInspector structInsp = (StructObjectInspector) sfOI; + List structFields = structInsp.getAllStructFieldRefs(); + for (int j = 0; j < structFields.size(); ++j) { + endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector()); + } + break; + case MAP: + endIdx += 1; + MapObjectInspector mapInsp = (MapObjectInspector) sfOI; + endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector()); + endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector()); + break; + case LIST: + endIdx += 1; + ListObjectInspector listInsp = (ListObjectInspector) sfOI; + endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector()); + break; + case UNION: + endIdx += 1; + UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI; + List choices = unionInsp.getObjectInspectors(); + for (int j = 0; j < choices.size(); ++j) { + endIdx += getFlattenedColumnsCount(choices.get(j)); + } + break; + default: + throw new IllegalArgumentException("Bad category: " + + inspector.getCategory()); + } + + columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx)); + } + } + } + return columnSpanMap; + } + + /** + * Returns the number of columns after flatting complex types. + * + * @param inspector - object inspector + * @return + */ + public static int getFlattenedColumnsCount(ObjectInspector inspector) { + int numWriters = 0; + switch (inspector.getCategory()) { + case PRIMITIVE: + numWriters += 1; + break; + case STRUCT: + numWriters += 1; + StructObjectInspector structInsp = (StructObjectInspector) inspector; + List fields = structInsp.getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); ++i) { + numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector()); + } + break; + case MAP: + numWriters += 1; + MapObjectInspector mapInsp = (MapObjectInspector) inspector; + numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector()); + numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector()); + break; + case LIST: + numWriters += 1; + ListObjectInspector listInsp = (ListObjectInspector) inspector; + numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector()); + break; + case UNION: + numWriters += 1; + UnionObjectInspector unionInsp = (UnionObjectInspector) inspector; + List choices = unionInsp.getObjectInspectors(); + for (int i = 0; i < choices.size(); ++i) { + numWriters += getFlattenedColumnsCount(choices.get(i)); + } + break; + default: + throw new IllegalArgumentException("Bad category: " + + inspector.getCategory()); + } + return numWriters; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index e3afa60..c05ae73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -24,6 +24,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; +import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; @@ -51,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.io.filters.BloomFilter; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -101,11 +103,14 @@ List bufferChunks = new ArrayList(0); private final TreeReader reader; private final OrcProto.RowIndex[] indexes; + private final OrcProto.BloomFilterIndex[] bloomFilterIndices; private final SearchArgument sarg; // the leaf predicates for the sarg private final List sargLeaves; // an array the same length as the sargLeaves that map them to column ids private final int[] filterColumns; + // same as the above array, but indices are set to true + private final boolean[] sargColumns; // an array about which row groups aren't skipped private boolean[] includedRowGroups = null; private final Configuration conf; @@ -113,6 +118,24 @@ private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool(); private final ZeroCopyReaderShim zcr; + public final static class Index { + OrcProto.RowIndex[] rowGroupIndex; + OrcProto.BloomFilterIndex[] bloomFilterIndex; + + Index(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) { + this.rowGroupIndex = rgIndex; + this.bloomFilterIndex = bfIndex; + } + + public OrcProto.RowIndex[] getRowGroupIndex() { + return rowGroupIndex; + } + + public OrcProto.BloomFilterIndex[] getBloomFilterIndex() { + return bloomFilterIndex; + } + } + // this is an implementation copied from ElasticByteBufferPool in hadoop-2, // which lacks a clear()/clean() operation public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim { @@ -251,9 +274,18 @@ static int findColumns(String[] columnNames, if (sarg != null) { sargLeaves = sarg.getLeaves(); filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0); + // included will not be null, row options will fill the array with trues if null + sargColumns = new boolean[included.length]; + for (int i : filterColumns) { + // filter columns may have -1 as index which could be partition column in SARG. + if (i > 0) { + sargColumns[i] = true; + } + } } else { sargLeaves = null; filterColumns = null; + sargColumns = null; } long rows = 0; long skippedRows = 0; @@ -285,6 +317,7 @@ static int findColumns(String[] columnNames, totalRowCount = rows; reader = createTreeReader(path, 0, types, included, conf); indexes = new OrcProto.RowIndex[types.size()]; + bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; rowIndexStride = strideRate; advanceToNextRow(0L); } @@ -2358,15 +2391,20 @@ static Object getMin(ColumnStatistics index) { * that is referenced in the predicate. * @param statsProto the statistics for the column mentioned in the predicate * @param predicate the leaf predicate we need to evaluation + * @param bloomFilter * @return the set of truth values that may be returned for the given * predicate. */ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto, - PredicateLeaf predicate) { + PredicateLeaf predicate, OrcProto.BloomFilter bloomFilter) { ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto); Object minValue = getMin(cs); Object maxValue = getMax(cs); - return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull()); + BloomFilter bf = null; + if (bloomFilter != null) { + bf = new BloomFilter(bloomFilter); + } + return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), bf); } /** @@ -2378,14 +2416,14 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto, * predicate. */ static TruthValue evaluatePredicate(ColumnStatistics stats, - PredicateLeaf predicate) { + PredicateLeaf predicate, BloomFilter bloomFilter) { Object minValue = getMin(stats); Object maxValue = getMax(stats); - return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull()); + return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, - Object max, boolean hasNull) { + Object max, boolean hasNull, BloomFilter bloomFilter) { // if we didn't have any values, everything must have been null if (min == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { @@ -2411,20 +2449,30 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); - if (loc == Location.BEFORE || loc == Location.AFTER) { - return TruthValue.NO; + // if bloom filter exists, check in bloom filter else min/max stats + if (bloomFilter == null) { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + if (loc == Location.BEFORE || loc == Location.AFTER) { + return TruthValue.NO; + } else { + return TruthValue.YES_NO; + } } else { - return TruthValue.YES_NO; + return checkInBloomFilter(bloomFilter, predObj); } case EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); - if (minValue.equals(maxValue) && loc == Location.MIN) { - return hasNull ? TruthValue.YES_NULL : TruthValue.YES; - } else if (loc == Location.BEFORE || loc == Location.AFTER) { - return hasNull ? TruthValue.NO_NULL : TruthValue.NO; + // if bloom filter exists, check in bloom filter else min/max stats + if (bloomFilter == null) { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + if (minValue.equals(maxValue) && loc == Location.MIN) { + return hasNull ? TruthValue.YES_NULL : TruthValue.YES; + } else if (loc == Location.BEFORE || loc == Location.AFTER) { + return hasNull ? TruthValue.NO_NULL : TruthValue.NO; + } else { + return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; + } } else { - return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; + return checkInBloomFilter(bloomFilter, predObj); } case LESS_THAN: loc = compareToRange((Comparable) predObj, minValue, maxValue); @@ -2450,9 +2498,16 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, // set for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { predObj = getBaseObjectForComparison(arg, minValue); - loc = compareToRange((Comparable) predObj, minValue, maxValue); - if (loc == Location.MIN) { - return hasNull ? TruthValue.YES_NULL : TruthValue.YES; + if (bloomFilter == null) { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + if (loc == Location.MIN) { + return hasNull ? TruthValue.YES_NULL : TruthValue.YES; + } + } else { + // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe + if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) { + return TruthValue.YES_NO_NULL; + } } } return hasNull ? TruthValue.NO_NULL : TruthValue.NO; @@ -2460,10 +2515,17 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, // are all of the values outside of the range? for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { predObj = getBaseObjectForComparison(arg, minValue); - loc = compareToRange((Comparable) predObj, minValue, maxValue); - if (loc == Location.MIN || loc == Location.MIDDLE || - loc == Location.MAX) { - return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; + if (bloomFilter == null) { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + if (loc == Location.MIN || loc == Location.MIDDLE || + loc == Location.MAX) { + return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; + } + } else { + // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe + if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) { + return TruthValue.YES_NO_NULL; + } } } return hasNull ? TruthValue.NO_NULL : TruthValue.NO; @@ -2502,6 +2564,49 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, } } + private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj) { + TruthValue result = TruthValue.NO_NULL; + + if (predObj instanceof Long) { + if (bf.testLong(((Long) predObj).longValue())) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof Double) { + if (bf.testDouble(((Double) predObj).doubleValue())) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof String || predObj instanceof Text || + predObj instanceof HiveDecimal || predObj instanceof BigDecimal) { + if (bf.testString(predObj.toString())) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof Date) { + if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof DateWritable) { + if (bf.testLong(((DateWritable) predObj).getDays())) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof Timestamp) { + if (bf.testLong(((Timestamp) predObj).getTime())) { + result = TruthValue.YES_NO_NULL; + } + } else if (predObj instanceof TimestampWritable) { + if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) { + result = TruthValue.YES_NO_NULL; + } + } else { + result = TruthValue.YES_NO_NULL; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Bloom filter evaluation: " + result.toString()); + } + + return result; + } + private static Object getBaseObjectForComparison(Object predObj, Object statsObj) { if (predObj != null) { if (predObj instanceof ExprNodeConstantDesc) { @@ -2567,7 +2672,7 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) { if (sarg == null || rowIndexStride == 0) { return null; } - readRowIndex(currentStripe); + readRowIndex(currentStripe, sargColumns); long rowsInStripe = stripes.get(currentStripe).getNumberOfRows(); int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride); @@ -2578,7 +2683,11 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) { if (filterColumns[pred] != -1) { OrcProto.ColumnStatistics stats = indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics(); - leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred)); + OrcProto.BloomFilter bf = null; + if (bloomFilterIndices[filterColumns[pred]] != null) { + bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup); + } + leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred), bf); if (LOG.isDebugEnabled()) { LOG.debug("Stats = " + stats); LOG.debug("Setting " + sargLeaves.get(pred) + " to " + @@ -3221,7 +3330,7 @@ private int findStripe(long rowNumber) { throw new IllegalArgumentException("Seek after the end of reader range"); } - OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException { + Index readRowIndex(int stripeIndex, boolean[] sargColumns) throws IOException { long offset = stripes.get(stripeIndex).getOffset(); OrcProto.StripeFooter stripeFooter; OrcProto.RowIndex[] indexes; @@ -3233,21 +3342,45 @@ private int findStripe(long rowNumber) { stripeFooter = readStripeFooter(stripes.get(stripeIndex)); indexes = new OrcProto.RowIndex[this.indexes.length]; } - for(OrcProto.Stream stream: stripeFooter.getStreamsList()) { + List streams = stripeFooter.getStreamsList(); + for (int i = 0; i < streams.size(); i++) { + OrcProto.Stream stream = streams.get(i); + OrcProto.Stream nextStream = null; + if (i < streams.size() - 1) { + nextStream = streams.get(i+1); + } + int col = stream.getColumn(); + int len = (int) stream.getLength(); + // row index stream and bloom filter are interlaced, check if the sarg column contains bloom + // filter and combine the io to read row index and bloom filters for that column together if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) { - int col = stream.getColumn(); + boolean readBloomFilter = false; + if (sargColumns != null && sargColumns[col] && + nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) { + len += nextStream.getLength(); + i += 1; + readBloomFilter = true; + } if ((included == null || included[col]) && indexes[col] == null) { - byte[] buffer = new byte[(int) stream.getLength()]; + byte[] buffer = new byte[len]; file.seek(offset); file.readFully(buffer); + ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)}; indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index", - new ByteBuffer[] {ByteBuffer.wrap(buffer)}, new long[]{0}, - stream.getLength(), codec, bufferSize)); + bb, new long[]{0}, stream.getLength(), codec, bufferSize)); + if (readBloomFilter) { + bb[0].position((int) stream.getLength()); + bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom( + InStream.create("bloom_filter", bb, new long[]{0}, nextStream.getLength(), + codec, bufferSize)); + } } } - offset += stream.getLength(); + offset += len; } - return indexes; + + Index index = new Index(indexes, bloomFilterIndices); + return index; } private void seekToRowEntry(int rowEntry) throws IOException { @@ -3279,7 +3412,7 @@ public void seekToRow(long rowNumber) throws IOException { currentStripe = rightStripe; readStripe(); } - readRowIndex(currentStripe); + readRowIndex(currentStripe, sargColumns); // if we aren't to the right row yet, advanance in the stripe. advanceToNextRow(rowNumber); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java index 3d44954..6215791 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java @@ -75,6 +75,7 @@ public static Area getArea(OrcProto.Stream.Kind kind) { switch (kind) { case ROW_INDEX: case DICTIONARY_COUNT: + case BLOOM_FILTER: return Area.INDEX; default: return Area.DATA; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 159d3ab..f566369 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -20,10 +20,16 @@ import static com.google.common.base.Preconditions.checkArgument; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.protobuf.ByteString; -import com.google.protobuf.CodedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.management.ManagementFactory; +import java.nio.ByteBuffer; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,6 +40,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.filters.BloomFilter; import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier; import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy; import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy; @@ -70,16 +77,12 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.management.ManagementFactory; -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import com.google.common.primitives.Longs; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedOutputStream; /** * An ORC file writer. The file is divided into stripes, which is the natural @@ -145,23 +148,27 @@ private final OrcFile.WriterContext callbackContext; private final OrcFile.EncodingStrategy encodingStrategy; private final OrcFile.CompressionStrategy compressionStrategy; + private final boolean[] bloomFilterColumns; + private final double bloomFilterFpp; WriterImpl(FileSystem fs, - Path path, - Configuration conf, - ObjectInspector inspector, - long stripeSize, - CompressionKind compress, - int bufferSize, - int rowIndexStride, - MemoryManager memoryManager, - boolean addBlockPadding, - OrcFile.Version version, - OrcFile.WriterCallback callback, - OrcFile.EncodingStrategy encodingStrategy, - CompressionStrategy compressionStrategy, - float paddingTolerance, - long blockSizeValue) throws IOException { + Path path, + Configuration conf, + ObjectInspector inspector, + long stripeSize, + CompressionKind compress, + int bufferSize, + int rowIndexStride, + MemoryManager memoryManager, + boolean addBlockPadding, + OrcFile.Version version, + OrcFile.WriterCallback callback, + EncodingStrategy encodingStrategy, + CompressionStrategy compressionStrategy, + float paddingTolerance, + long blockSizeValue, + String bloomFilterColumnNames, + double bloomFilterFpp) throws IOException { this.fs = fs; this.path = path; this.conf = conf; @@ -190,7 +197,13 @@ public Writer getWriter() { this.memoryManager = memoryManager; buildIndex = rowIndexStride > 0; codec = createCodec(compress); - this.bufferSize = getEstimatedBufferSize(bufferSize); + String allColumns = conf.get(IOConstants.COLUMNS); + if (allColumns == null) { + allColumns = getColumnNamesFromInspector(inspector); + } + this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize); + this.bloomFilterColumns = OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector); + this.bloomFilterFpp = bloomFilterFpp; treeWriter = createTreeWriter(inspector, streamFactory, false); if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) { throw new IllegalArgumentException("Row stride must be at least " + @@ -201,8 +214,25 @@ public Writer getWriter() { memoryManager.addWriter(path, stripeSize, this); } + private String getColumnNamesFromInspector(ObjectInspector inspector) { + List fieldNames = Lists.newArrayList(); + Joiner joiner = Joiner.on(","); + if (inspector instanceof StructObjectInspector) { + StructObjectInspector soi = (StructObjectInspector) inspector; + List fields = soi.getAllStructFieldRefs(); + for(StructField sf : fields) { + fieldNames.add(sf.getFieldName()); + } + } + return joiner.join(fieldNames); + } + + @VisibleForTesting int getEstimatedBufferSize(int bs) { - String colNames = conf.get(IOConstants.COLUMNS); + return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs); + } + + int getEstimatedBufferSize(String colNames, int bs) { long availableMem = getMemoryAvailableForORC(); if (colNames != null) { final int numCols = colNames.split(",").length; @@ -459,26 +489,27 @@ public OutStream createStream(int column, final EnumSet modifiers; switch (kind) { - case DATA: - case DICTIONARY_DATA: - if (getCompressionStrategy() == CompressionStrategy.SPEED) { - modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT); - } else { - modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT); - } - break; - case LENGTH: - case DICTIONARY_COUNT: - case PRESENT: - case ROW_INDEX: - case SECONDARY: - // easily compressed using the fastest modes - modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY); - break; - default: - LOG.warn("Missing ORC compression modifiers for " + kind); - modifiers = null; - break; + case BLOOM_FILTER: + case DATA: + case DICTIONARY_DATA: + if (getCompressionStrategy() == CompressionStrategy.SPEED) { + modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT); + } else { + modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT); + } + break; + case LENGTH: + case DICTIONARY_COUNT: + case PRESENT: + case ROW_INDEX: + case SECONDARY: + // easily compressed using the fastest modes + modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY); + break; + default: + LOG.warn("Missing ORC compression modifiers for " + kind); + modifiers = null; + break; } BufferedStream result = streams.get(name); @@ -499,6 +530,15 @@ public int getNextColumnId() { } /** + * Get the current column id. After creating all tree writers this count should tell how many + * columns (including columns within nested complex objects) are created in total. + * @return current column id + */ + public int getCurrentColumnId() { + return columnCount; + } + + /** * Get the stride rate of the row index. */ public int getRowIndexStride() { @@ -538,6 +578,22 @@ public CompressionStrategy getCompressionStrategy() { } /** + * Get the bloom filter columns + * @return bloom filter columns + */ + public boolean[] getBloomFilterColumns() { + return bloomFilterColumns; + } + + /** + * Get bloom filter false positive percentage. + * @return fpp + */ + public double getBloomFilterFPP() { + return bloomFilterFpp; + } + + /** * Get the writer's configuration. * @return configuration */ @@ -572,6 +628,11 @@ public Configuration getConfiguration() { private final OrcProto.RowIndex.Builder rowIndex; private final OrcProto.RowIndexEntry.Builder rowIndexEntry; private final PositionedOutputStream rowIndexStream; + private final PositionedOutputStream bloomFilterStream; + protected final BloomFilter bloomFilter; + protected final boolean createBloomFilter; + private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex; + private final OrcProto.BloomFilter.Builder bloomFilterEntry; private boolean foundNulls; private OutStream isPresentOutStream; private final List stripeStatsBuilders; @@ -598,6 +659,7 @@ public Configuration getConfiguration() { isPresent = null; } this.foundNulls = false; + createBloomFilter = streamFactory.getBloomFilterColumns()[columnId]; indexStatistics = ColumnStatisticsImpl.create(inspector); stripeColStatistics = ColumnStatisticsImpl.create(inspector); fileStatistics = ColumnStatisticsImpl.create(inspector); @@ -607,11 +669,22 @@ public Configuration getConfiguration() { rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry); stripeStatsBuilders = Lists.newArrayList(); if (streamFactory.buildIndex()) { - rowIndexStream = streamFactory.createStream(id, - OrcProto.Stream.Kind.ROW_INDEX); + rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Kind.ROW_INDEX); } else { rowIndexStream = null; } + if (createBloomFilter) { + bloomFilterEntry = OrcProto.BloomFilter.newBuilder(); + bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder(); + bloomFilterStream = streamFactory.createStream(id, OrcProto.Stream.Kind.BLOOM_FILTER); + bloomFilter = new BloomFilter(streamFactory.getRowIndexStride(), + streamFactory.getBloomFilterFPP()); + } else { + bloomFilterEntry = null; + bloomFilterIndex = null; + bloomFilterStream = null; + bloomFilter = null; + } } protected OrcProto.RowIndex.Builder getRowIndex() { @@ -725,6 +798,14 @@ void writeStripe(OrcProto.StripeFooter.Builder builder, } rowIndex.clear(); rowIndexEntry.clear(); + + // write the bloom filter to out stream + if (bloomFilterStream != null) { + bloomFilterIndex.build().writeTo(bloomFilterStream); + bloomFilterStream.flush(); + bloomFilterIndex.clear(); + bloomFilterEntry.clear(); + } } private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder, @@ -763,12 +844,23 @@ void createRowIndexEntry() throws IOException { indexStatistics.reset(); rowIndex.addEntry(rowIndexEntry); rowIndexEntry.clear(); + addBloomFilterEntry(); recordPosition(rowIndexPosition); for(TreeWriter child: childrenWriters) { child.createRowIndexEntry(); } } + void addBloomFilterEntry() { + if (createBloomFilter) { + bloomFilterEntry.setNumHashFunctions(bloomFilter.getNumHashFunctions()); + bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet())); + bloomFilterIndex.addBloomFilter(bloomFilterEntry.build()); + bloomFilter.reset(); + bloomFilterEntry.clear(); + } + } + /** * Record the current position in each of this column's streams. * @param recorder where should the locations be recorded @@ -851,6 +943,9 @@ void write(Object obj) throws IOException { if (obj != null) { byte val = ((ByteObjectInspector) inspector).get(obj); indexStatistics.updateInteger(val); + if (createBloomFilter) { + bloomFilter.addLong(val); + } writer.write(val); } } @@ -926,6 +1021,10 @@ void write(Object obj) throws IOException { val = shortInspector.get(obj); } indexStatistics.updateInteger(val); + if (createBloomFilter) { + // integers are converted to longs in column statistics and during SARG evaluation + bloomFilter.addLong(val); + } writer.write(val); } } @@ -966,6 +1065,10 @@ void write(Object obj) throws IOException { if (obj != null) { float val = ((FloatObjectInspector) inspector).get(obj); indexStatistics.updateDouble(val); + if (createBloomFilter) { + // floats are converted to doubles in column statistics and during SARG evaluation + bloomFilter.addDouble(val); + } utils.writeFloat(stream, val); } } @@ -1006,6 +1109,9 @@ void write(Object obj) throws IOException { if (obj != null) { double val = ((DoubleObjectInspector) inspector).get(obj); indexStatistics.updateDouble(val); + if (createBloomFilter) { + bloomFilter.addDouble(val); + } utils.writeDouble(stream, val); } } @@ -1099,6 +1205,9 @@ void write(Object obj) throws IOException { directLengthOutput.write(val.getLength()); } indexStatistics.updateString(val); + if (createBloomFilter) { + bloomFilter.addBytes(val.getBytes(), val.getLength()); + } } } @@ -1250,6 +1359,7 @@ void createRowIndexEntry() throws IOException { OrcProto.RowIndexEntry base = rowIndexEntry.build(); savedRowIndex.add(base); rowIndexEntry.clear(); + addBloomFilterEntry(); recordPosition(rowIndexPosition); rowIndexValueCount.add(Long.valueOf(rows.size())); if (strideDictionaryCheck) { @@ -1360,6 +1470,9 @@ void write(Object obj) throws IOException { stream.write(val.getBytes(), 0, val.getLength()); length.write(val.getLength()); indexStatistics.updateBinary(val); + if (createBloomFilter) { + bloomFilter.addBytes(val.getBytes(), val.getLength()); + } } } @@ -1422,6 +1535,9 @@ void write(Object obj) throws IOException { indexStatistics.updateTimestamp(val); seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP); nanos.write(formatNanos(val.getNanos())); + if (createBloomFilter) { + bloomFilter.addLong(val.getTime()); + } } } @@ -1482,6 +1598,9 @@ void write(Object obj) throws IOException { DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj); indexStatistics.updateDate(val); writer.write(val.getDays()); + if (createBloomFilter) { + bloomFilter.addLong(val.getDays()); + } } } @@ -1550,6 +1669,9 @@ void write(Object obj) throws IOException { decimal.unscaledValue()); scaleStream.write(decimal.scale()); indexStatistics.updateDecimal(decimal); + if (createBloomFilter) { + bloomFilter.addString(decimal.toString()); + } } } @@ -1649,6 +1771,9 @@ void write(Object obj) throws IOException { ListObjectInspector insp = (ListObjectInspector) inspector; int len = insp.getListLength(obj); lengths.write(len); + if (createBloomFilter) { + bloomFilter.addLong(len); + } for(int i=0; i < len; ++i) { childrenWriters[0].write(insp.getListElement(obj, i)); } @@ -1713,6 +1838,9 @@ void write(Object obj) throws IOException { // accessor in the MapObjectInspector. Map valueMap = insp.getMap(obj); lengths.write(valueMap.size()); + if (createBloomFilter) { + bloomFilter.addLong(valueMap.size()); + } for(Map.Entry entry: valueMap.entrySet()) { childrenWriters[0].write(entry.getKey()); childrenWriters[1].write(entry.getValue()); @@ -1765,6 +1893,9 @@ void write(Object obj) throws IOException { UnionObjectInspector insp = (UnionObjectInspector) inspector; byte tag = insp.getTag(obj); tags.write(tag); + if (createBloomFilter) { + bloomFilter.addLong(tag); + } childrenWriters[tag].write(insp.getField(obj)); } } diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 98459fb..14a32e8 100644 --- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -86,6 +86,15 @@ message RowIndex { repeated RowIndexEntry entry = 1; } +message BloomFilter { + optional uint32 numHashFunctions = 1; + repeated fixed64 bitset = 2; +} + +message BloomFilterIndex { + repeated BloomFilter bloomFilter = 1; +} + message Stream { // if you add new index stream kinds, you need to make sure to update // StreamName to ensure it is added to the stripe in the right area @@ -97,6 +106,7 @@ message Stream { DICTIONARY_COUNT = 4; SECONDARY = 5; ROW_INDEX = 6; + BLOOM_FILTER = 7; } required Kind kind = 1; optional uint32 column = 2; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java new file mode 100644 index 0000000..32b95ab --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java @@ -0,0 +1,458 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.filters; + +import static org.junit.Assert.assertEquals; + +import java.util.Random; + +import org.junit.Test; + +/** + * + */ +public class TestBloomFilter { + private static final int COUNT = 100; + Random rand = new Random(123); + + @Test(expected = IllegalArgumentException.class) + public void testBloomIllegalArg1() { + BloomFilter bf = new BloomFilter(0, 0); + } + + @Test(expected = IllegalArgumentException.class) + public void testBloomIllegalArg2() { + BloomFilter bf = new BloomFilter(0, 0.1); + } + + @Test(expected = IllegalArgumentException.class) + public void testBloomIllegalArg3() { + BloomFilter bf = new BloomFilter(1, 0.0); + } + + @Test(expected = IllegalArgumentException.class) + public void testBloomIllegalArg4() { + BloomFilter bf = new BloomFilter(1, 1.0); + } + + @Test(expected = IllegalArgumentException.class) + public void testBloomIllegalArg5() { + BloomFilter bf = new BloomFilter(-1, -1); + } + + + @Test + public void testBloomNumBits() { + assertEquals(0, BloomFilter.optimalNumOfBits(0, 0)); + assertEquals(0, BloomFilter.optimalNumOfBits(0, 1)); + assertEquals(0, BloomFilter.optimalNumOfBits(1, 1)); + assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03)); + assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03)); + assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03)); + assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03)); + assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03)); + assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03)); + assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03)); + assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05)); + } + + @Test + public void testBloomNumHashFunctions() { + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10)); + assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100)); + assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100)); + } + + @Test + public void testBloomFilterBytes() { + BloomFilter bf = new BloomFilter(10000); + byte[] val = new byte[]{1, 2, 3}; + byte[] val1 = new byte[]{1, 2, 3, 4}; + byte[] val2 = new byte[]{1, 2, 3, 4, 5}; + byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6}; + + assertEquals(false, bf.test(val)); + assertEquals(false, bf.test(val1)); + assertEquals(false, bf.test(val2)); + assertEquals(false, bf.test(val3)); + bf.add(val); + assertEquals(true, bf.test(val)); + assertEquals(false, bf.test(val1)); + assertEquals(false, bf.test(val2)); + assertEquals(false, bf.test(val3)); + bf.add(val1); + assertEquals(true, bf.test(val)); + assertEquals(true, bf.test(val1)); + assertEquals(false, bf.test(val2)); + assertEquals(false, bf.test(val3)); + bf.add(val2); + assertEquals(true, bf.test(val)); + assertEquals(true, bf.test(val1)); + assertEquals(true, bf.test(val2)); + assertEquals(false, bf.test(val3)); + bf.add(val3); + assertEquals(true, bf.test(val)); + assertEquals(true, bf.test(val1)); + assertEquals(true, bf.test(val2)); + assertEquals(true, bf.test(val3)); + + byte[] randVal = new byte[COUNT]; + for (int i = 0; i < COUNT; i++) { + rand.nextBytes(randVal); + bf.add(randVal); + } + // last value should be present + assertEquals(true, bf.test(randVal)); + // most likely this value should not exist + randVal[0] = 0; + randVal[1] = 0; + randVal[2] = 0; + randVal[3] = 0; + randVal[4] = 0; + assertEquals(false, bf.test(randVal)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterByte() { + BloomFilter bf = new BloomFilter(10000); + byte val = Byte.MIN_VALUE; + byte val1 = 1; + byte val2 = 2; + byte val3 = Byte.MAX_VALUE; + + assertEquals(false, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val); + assertEquals(true, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val1); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val2); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val3); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(true, bf.testLong(val3)); + + byte randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = (byte) rand.nextInt(Byte.MAX_VALUE); + bf.addLong(randVal); + } + // last value should be present + assertEquals(true, bf.testLong(randVal)); + // most likely this value should not exist + assertEquals(false, bf.testLong((byte) -120)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterInt() { + BloomFilter bf = new BloomFilter(10000); + int val = Integer.MIN_VALUE; + int val1 = 1; + int val2 = 2; + int val3 = Integer.MAX_VALUE; + + assertEquals(false, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val); + assertEquals(true, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val1); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val2); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val3); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(true, bf.testLong(val3)); + + int randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = rand.nextInt(); + bf.addLong(randVal); + } + // last value should be present + assertEquals(true, bf.testLong(randVal)); + // most likely this value should not exist + assertEquals(false, bf.testLong(-120)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterLong() { + BloomFilter bf = new BloomFilter(10000); + long val = Long.MIN_VALUE; + long val1 = 1; + long val2 = 2; + long val3 = Long.MAX_VALUE; + + assertEquals(false, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val); + assertEquals(true, bf.testLong(val)); + assertEquals(false, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val1); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(false, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val2); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(false, bf.testLong(val3)); + bf.addLong(val3); + assertEquals(true, bf.testLong(val)); + assertEquals(true, bf.testLong(val1)); + assertEquals(true, bf.testLong(val2)); + assertEquals(true, bf.testLong(val3)); + + long randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = rand.nextLong(); + bf.addLong(randVal); + } + // last value should be present + assertEquals(true, bf.testLong(randVal)); + // most likely this value should not exist + assertEquals(false, bf.testLong(-120)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterFloat() { + BloomFilter bf = new BloomFilter(10000); + float val = Float.MIN_VALUE; + float val1 = 1.1f; + float val2 = 2.2f; + float val3 = Float.MAX_VALUE; + + assertEquals(false, bf.testDouble(val)); + assertEquals(false, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val); + assertEquals(true, bf.testDouble(val)); + assertEquals(false, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val1); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val2); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(true, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val3); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(true, bf.testDouble(val2)); + assertEquals(true, bf.testDouble(val3)); + + float randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = rand.nextFloat(); + bf.addDouble(randVal); + } + // last value should be present + assertEquals(true, bf.testDouble(randVal)); + // most likely this value should not exist + assertEquals(false, bf.testDouble(-120.2f)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterDouble() { + BloomFilter bf = new BloomFilter(10000); + double val = Double.MIN_VALUE; + double val1 = 1.1d; + double val2 = 2.2d; + double val3 = Double.MAX_VALUE; + + assertEquals(false, bf.testDouble(val)); + assertEquals(false, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val); + assertEquals(true, bf.testDouble(val)); + assertEquals(false, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val1); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(false, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val2); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(true, bf.testDouble(val2)); + assertEquals(false, bf.testDouble(val3)); + bf.addDouble(val3); + assertEquals(true, bf.testDouble(val)); + assertEquals(true, bf.testDouble(val1)); + assertEquals(true, bf.testDouble(val2)); + assertEquals(true, bf.testDouble(val3)); + + double randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = rand.nextDouble(); + bf.addDouble(randVal); + } + // last value should be present + assertEquals(true, bf.testDouble(randVal)); + // most likely this value should not exist + assertEquals(false, bf.testDouble(-120.2d)); + + assertEquals(7800, bf.sizeInBytes()); + } + + @Test + public void testBloomFilterString() { + BloomFilter bf = new BloomFilter(100000); + String val = "bloo"; + String val1 = "bloom fil"; + String val2 = "bloom filter"; + String val3 = "cuckoo filter"; + + assertEquals(false, bf.testString(val)); + assertEquals(false, bf.testString(val1)); + assertEquals(false, bf.testString(val2)); + assertEquals(false, bf.testString(val3)); + bf.addString(val); + assertEquals(true, bf.testString(val)); + assertEquals(false, bf.testString(val1)); + assertEquals(false, bf.testString(val2)); + assertEquals(false, bf.testString(val3)); + bf.addString(val1); + assertEquals(true, bf.testString(val)); + assertEquals(true, bf.testString(val1)); + assertEquals(false, bf.testString(val2)); + assertEquals(false, bf.testString(val3)); + bf.addString(val2); + assertEquals(true, bf.testString(val)); + assertEquals(true, bf.testString(val1)); + assertEquals(true, bf.testString(val2)); + assertEquals(false, bf.testString(val3)); + bf.addString(val3); + assertEquals(true, bf.testString(val)); + assertEquals(true, bf.testString(val1)); + assertEquals(true, bf.testString(val2)); + assertEquals(true, bf.testString(val3)); + + long randVal = 0; + for (int i = 0; i < COUNT; i++) { + randVal = rand.nextLong(); + bf.addString(Long.toString(randVal)); + } + // last value should be present + assertEquals(true, bf.testString(Long.toString(randVal))); + // most likely this value should not exist + assertEquals(false, bf.testString(Long.toString(-120))); + + assertEquals(77944, bf.sizeInBytes()); + } + + @Test + public void testMerge() { + BloomFilter bf = new BloomFilter(10000); + String val = "bloo"; + String val1 = "bloom fil"; + String val2 = "bloom filter"; + String val3 = "cuckoo filter"; + bf.addString(val); + bf.addString(val1); + bf.addString(val2); + bf.addString(val3); + + BloomFilter bf2 = new BloomFilter(10000); + String v = "2_bloo"; + String v1 = "2_bloom fil"; + String v2 = "2_bloom filter"; + String v3 = "2_cuckoo filter"; + bf2.addString(v); + bf2.addString(v1); + bf2.addString(v2); + bf2.addString(v3); + + assertEquals(true, bf.testString(val)); + assertEquals(true, bf.testString(val1)); + assertEquals(true, bf.testString(val2)); + assertEquals(true, bf.testString(val3)); + assertEquals(false, bf.testString(v)); + assertEquals(false, bf.testString(v1)); + assertEquals(false, bf.testString(v2)); + assertEquals(false, bf.testString(v3)); + + bf.merge(bf2); + + assertEquals(true, bf.testString(val)); + assertEquals(true, bf.testString(val1)); + assertEquals(true, bf.testString(val2)); + assertEquals(true, bf.testString(val3)); + assertEquals(true, bf.testString(v)); + assertEquals(true, bf.testString(v1)); + assertEquals(true, bf.testString(v2)); + assertEquals(true, bf.testString(v3)); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java new file mode 100644 index 0000000..d92a3ce --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.filters; + +import static org.junit.Assert.assertEquals; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Random; + +/** + * Tests for Murmur3 variants. + */ +public class TestMurmur3 { + + @Test + public void testHashCodesM3_32_string() { + String key = "test"; + int seed = 123; + HashFunction hf = Hashing.murmur3_32(seed); + int hc1 = hf.hashBytes(key.getBytes()).asInt(); + int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); + assertEquals(hc1, hc2); + + key = "testkey"; + hc1 = hf.hashBytes(key.getBytes()).asInt(); + hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); + assertEquals(hc1, hc2); + } + + @Test + public void testHashCodesM3_32_ints() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + int val = rand.nextInt(); + byte[] data = ByteBuffer.allocate(4).putInt(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_32_longs() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + long val = rand.nextLong(); + byte[] data = ByteBuffer.allocate(8).putLong(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_32_double() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + double val = rand.nextDouble(); + byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_128_string() { + String key = "test"; + int seed = 123; + HashFunction hf = Hashing.murmur3_128(seed); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(key.getBytes()).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + + key = "testkey128_testkey128"; + buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(key.getBytes()).asBytes()); + buf.flip(); + gl1 = buf.getLong(); + gl2 = buf.getLong(8); + hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed); + m1 = hc[0]; + m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + + @Test + public void testHashCodesM3_128_ints() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + int val = rand.nextInt(); + byte[] data = ByteBuffer.allocate(4).putInt(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + } + + @Test + public void testHashCodesM3_128_longs() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + long val = rand.nextLong(); + byte[] data = ByteBuffer.allocate(8).putLong(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + } + + @Test + public void testHashCodesM3_128_double() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + double val = rand.nextDouble(); + byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java index e451143..00afdac 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java @@ -21,19 +21,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hive.common.util.HiveTestUtils; -import org.junit.Before; -import org.junit.Test; - import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; @@ -49,6 +36,19 @@ import java.util.Map; import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hive.common.util.HiveTestUtils; +import org.junit.Before; +import org.junit.Test; + public class TestFileDump { Path workDir = new Path(System.getProperty("test.tmp.dir")); @@ -303,4 +303,101 @@ public void testDictionaryThreshold() throws Exception { checkOutput(outputFilename, workDir + File.separator + outputFilename); } + + @Test + public void testBloomFilter() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); + OrcFile.WriterOptions options = OrcFile.writerOptions(conf) + .fileSystem(fs) + .inspector(inspector) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .bufferSize(10000) + .rowIndexStride(1000) + .bloomFilterColumns("s"); + Writer writer = OrcFile.createWriter(testFilePath, options); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + for(int i=0; i < 21000; ++i) { + writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(), + words[r1.nextInt(words.length)])); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump-bloomfilter.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } + + @Test + public void testBloomFilter2() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); + OrcFile.WriterOptions options = OrcFile.writerOptions(conf) + .fileSystem(fs) + .inspector(inspector) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .bufferSize(10000) + .rowIndexStride(1000) + .bloomFilterColumns("l") + .bloomFilterFpp(0.01); + Writer writer = OrcFile.createWriter(testFilePath, options); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + for(int i=0; i < 21000; ++i) { + writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(), + words[r1.nextInt(words.length)])); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump-bloomfilter2.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 5c48d0b..cd1d645 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -515,6 +515,10 @@ public void testTimestamp() throws Exception { Object row = rows.next(null); assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos()); } + assertEquals(1, OrcUtils.getFlattenedColumnsCount(inspector)); + boolean[] expected = new boolean[] {false}; + boolean[] included = OrcUtils.includeColumns("", "ts", inspector); + assertEquals(true, Arrays.equals(expected, included)); } @Test @@ -538,6 +542,19 @@ public void testStringAndBinaryStatistics() throws Exception { Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector)); + boolean[] expected = new boolean[] {false, false, true}; + boolean[] included = OrcUtils.includeColumns("string1", "bytes1,string1", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, false, false}; + included = OrcUtils.includeColumns("", "bytes1,string1", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, false, false}; + included = OrcUtils.includeColumns(null, "bytes1,string1", inspector); + assertEquals(true, Arrays.equals(expected, included)); + // check the stats ColumnStatistics[] stats = reader.getStatistics(); assertEquals(4, stats[0].getNumberOfValues()); @@ -634,6 +651,12 @@ public void testStripeLevelStats() throws Exception { writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + + assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector)); + boolean[] expected = new boolean[] {false, true, false}; + boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector); + assertEquals(true, Arrays.equals(expected, included)); + Metadata metadata = reader.getMetadata(); int numStripes = metadata.getStripeStatistics().size(); assertEquals(3, numStripes); @@ -672,7 +695,7 @@ public void testStripeLevelStats() throws Exception { assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum()); RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows(); - OrcProto.RowIndex[] index = recordReader.readRowIndex(0); + OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null).getRowGroupIndex(); assertEquals(3, index.length); List items = index[1].getEntryList(); assertEquals(1, items.size()); @@ -682,7 +705,7 @@ public void testStripeLevelStats() throws Exception { assertEquals(0, items.get(0).getPositions(2)); assertEquals(1, items.get(0).getStatistics().getIntStatistics().getMinimum()); - index = recordReader.readRowIndex(1); + index = recordReader.readRowIndex(1, null).getRowGroupIndex(); assertEquals(3, index.length); items = index[1].getEntryList(); assertEquals(2, @@ -715,6 +738,44 @@ public void test1() throws Exception { Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(24, OrcUtils.getFlattenedColumnsCount(inspector)); + boolean[] expected = new boolean[] {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false}; + boolean[] included = OrcUtils.includeColumns("", + "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, true, false, false, false, + false, false, false, false, true, + true, true, true, true, true, + false, false, false, false, true, + true, true, true, true}; + included = OrcUtils.includeColumns("boolean1,string1,middle,map", + "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, true, false, false, false, + false, false, false, false, true, + true, true, true, true, true, + false, false, false, false, true, + true, true, true, true}; + included = OrcUtils.includeColumns("boolean1,string1,middle,map", + "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true}; + included = OrcUtils.includeColumns( + "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", + "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector); + assertEquals(true, Arrays.equals(expected, included)); + Metadata metadata = reader.getMetadata(); // check the stats @@ -1183,6 +1244,20 @@ public void testUnionAndTimestamp() throws Exception { writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + + assertEquals(6, OrcUtils.getFlattenedColumnsCount(inspector)); + boolean[] expected = new boolean[] {false, false, false, false, false, false}; + boolean[] included = OrcUtils.includeColumns("", "time,union,decimal", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, true, false, false, false, true}; + included = OrcUtils.includeColumns("time,decimal", "time,union,decimal", inspector); + assertEquals(true, Arrays.equals(expected, included)); + + expected = new boolean[] {false, false, true, true, true, false}; + included = OrcUtils.includeColumns("union", "time,union,decimal", inspector); + assertEquals(true, Arrays.equals(expected, included)); + assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(5309, reader.getNumberOfRows()); DecimalColumnStatistics stats = diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index a86d19f..db0ce0e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -320,27 +320,27 @@ public void testPredEvalWithIntStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null)); } @Test @@ -348,27 +348,27 @@ public void testPredEvalWithDoubleStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred)); + RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred)); + RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred)); + RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred)); + RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred)); + RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null)); } @Test @@ -376,27 +376,27 @@ public void testPredEvalWithStringStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 100.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "100", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(100), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null)); } @Test @@ -404,57 +404,57 @@ public void testPredEvalWithDateStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "1970-01-11", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15.1", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "__a15__1", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "2000-01-16", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "1970-01-16", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(150), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred)); + RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null)); } @@ -463,27 +463,27 @@ public void testPredEvalWithDecimalStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred)); + RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred)); + RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred)); + RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred)); + RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred)); + RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null)); } @@ -493,17 +493,17 @@ public void testEquals() throws Exception { (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred, null)); } @Test @@ -512,17 +512,17 @@ public void testNullSafeEquals() throws Exception { (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred, null)); } @Test @@ -531,15 +531,15 @@ public void testLessThan() throws Exception { (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan)); + RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan, null)); } @Test @@ -548,15 +548,15 @@ public void testLessThanEquals() throws Exception { (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null)); } @Test @@ -568,13 +568,13 @@ public void testIn() throws Exception { (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER, "x", null, args); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred, null)); } @Test @@ -586,19 +586,19 @@ public void testBetween() throws Exception { (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER, "x", null, args); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred, null)); assertEquals(TruthValue.YES, - RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred, null)); } @Test @@ -607,7 +607,7 @@ public void testIsNull() throws Exception { (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER, "x", null, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred)); + RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null)); } @@ -617,17 +617,17 @@ public void testEqualsWithNullInStats() throws Exception { (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same } @Test @@ -636,17 +636,17 @@ public void testNullSafeEqualsWithNullInStats() throws Exception { (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same } @Test @@ -655,17 +655,17 @@ public void testLessThanWithNullInStats() throws Exception { (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.NO_NULL, // min, same stats - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); } @Test @@ -674,17 +674,17 @@ public void testLessThanEqualsWithNullInStats() throws Exception { (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same } @Test @@ -696,17 +696,17 @@ public void testInWithNullInStats() throws Exception { (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING, "x", null, args); assertEquals(TruthValue.NO_NULL, // before & after - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred, null)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same } @Test @@ -718,31 +718,31 @@ public void testBetweenWithNullInStats() throws Exception { (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING, "x", null, args); assertEquals(TruthValue.YES_NULL, // before & after - RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); assertEquals(TruthValue.YES_NULL, // before & max - RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred, null)); assertEquals(TruthValue.NO_NULL, // before & before - RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred, null)); assertEquals(TruthValue.YES_NO_NULL, // before & min - RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred, null)); assertEquals(TruthValue.YES_NO_NULL, // before & middle - RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred, null)); assertEquals(TruthValue.YES_NULL, // min & after - RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred, null)); assertEquals(TruthValue.YES_NULL, // min & max - RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred, null)); assertEquals(TruthValue.YES_NO_NULL, // min & middle - RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred, null)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred)); // max + RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred, null)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle assertEquals(TruthValue.YES_NULL, // min & after, same stats - RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); } @Test @@ -751,9 +751,9 @@ public void testIsNullWithNullInStats() throws Exception { (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, "x", null, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred)); + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred, null)); } @Test diff --git ql/src/test/resources/orc-file-dump-bloomfilter.out ql/src/test/resources/orc-file-dump-bloomfilter.out new file mode 100644 index 0000000..add163c --- /dev/null +++ ql/src/test/resources/orc-file-dump-bloomfilter.out @@ -0,0 +1,177 @@ +Structure for TestFileDump.testDump.orc +File Version: 0.12 with HIVE_8732 +Rows: 21000 +Compression: ZLIB +Compression size: 10000 +Type: struct + +Stripe Statistics: + Stripe 1: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Stripe 2: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Stripe 3: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Stripe 4: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Stripe 5: + Column 0: count: 1000 hasNull: false + Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + +File Statistics: + Column 0: count: 21000 hasNull: false + Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + +Stripes: + Stripe: offset: 3 data: 63765 rows: 5000 tail: 86 index: 845 + Stream: column 0 section ROW_INDEX start: 3 length 17 + Stream: column 1 section ROW_INDEX start: 20 length 164 + Stream: column 2 section ROW_INDEX start: 184 length 173 + Stream: column 3 section ROW_INDEX start: 357 length 87 + Stream: column 3 section BLOOM_FILTER start: 444 length 404 + Stream: column 1 section DATA start: 848 length 20029 + Stream: column 2 section DATA start: 20877 length 40035 + Stream: column 3 section DATA start: 60912 length 3543 + Stream: column 3 section LENGTH start: 64455 length 25 + Stream: column 3 section DICTIONARY_DATA start: 64480 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 64699 data: 63754 rows: 5000 tail: 86 index: 837 + Stream: column 0 section ROW_INDEX start: 64699 length 17 + Stream: column 1 section ROW_INDEX start: 64716 length 162 + Stream: column 2 section ROW_INDEX start: 64878 length 171 + Stream: column 3 section ROW_INDEX start: 65049 length 83 + Stream: column 3 section BLOOM_FILTER start: 65132 length 404 + Stream: column 1 section DATA start: 65536 length 20029 + Stream: column 2 section DATA start: 85565 length 40035 + Stream: column 3 section DATA start: 125600 length 3532 + Stream: column 3 section LENGTH start: 129132 length 25 + Stream: column 3 section DICTIONARY_DATA start: 129157 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 129376 data: 63766 rows: 5000 tail: 86 index: 841 + Stream: column 0 section ROW_INDEX start: 129376 length 17 + Stream: column 1 section ROW_INDEX start: 129393 length 159 + Stream: column 2 section ROW_INDEX start: 129552 length 171 + Stream: column 3 section ROW_INDEX start: 129723 length 90 + Stream: column 3 section BLOOM_FILTER start: 129813 length 404 + Stream: column 1 section DATA start: 130217 length 20029 + Stream: column 2 section DATA start: 150246 length 40035 + Stream: column 3 section DATA start: 190281 length 3544 + Stream: column 3 section LENGTH start: 193825 length 25 + Stream: column 3 section DICTIONARY_DATA start: 193850 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 194069 data: 63796 rows: 5000 tail: 86 index: 844 + Stream: column 0 section ROW_INDEX start: 194069 length 17 + Stream: column 1 section ROW_INDEX start: 194086 length 162 + Stream: column 2 section ROW_INDEX start: 194248 length 170 + Stream: column 3 section ROW_INDEX start: 194418 length 91 + Stream: column 3 section BLOOM_FILTER start: 194509 length 404 + Stream: column 1 section DATA start: 194913 length 20029 + Stream: column 2 section DATA start: 214942 length 40035 + Stream: column 3 section DATA start: 254977 length 3574 + Stream: column 3 section LENGTH start: 258551 length 25 + Stream: column 3 section DICTIONARY_DATA start: 258576 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 258795 data: 12940 rows: 1000 tail: 78 index: 432 + Stream: column 0 section ROW_INDEX start: 258795 length 12 + Stream: column 1 section ROW_INDEX start: 258807 length 38 + Stream: column 2 section ROW_INDEX start: 258845 length 41 + Stream: column 3 section ROW_INDEX start: 258886 length 40 + Stream: column 3 section BLOOM_FILTER start: 258926 length 301 + Stream: column 1 section DATA start: 259227 length 4007 + Stream: column 2 section DATA start: 263234 length 8007 + Stream: column 3 section DATA start: 271241 length 768 + Stream: column 3 section LENGTH start: 272009 length 25 + Stream: column 3 section DICTIONARY_DATA start: 272034 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + +File length: 272790 bytes +Padding length: 0 bytes +Padding ratio: 0% diff --git ql/src/test/resources/orc-file-dump-bloomfilter2.out ql/src/test/resources/orc-file-dump-bloomfilter2.out new file mode 100644 index 0000000..06b65ce --- /dev/null +++ ql/src/test/resources/orc-file-dump-bloomfilter2.out @@ -0,0 +1,177 @@ +Structure for TestFileDump.testDump.orc +File Version: 0.12 with HIVE_8732 +Rows: 21000 +Compression: ZLIB +Compression size: 10000 +Type: struct + +Stripe Statistics: + Stripe 1: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Stripe 2: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Stripe 3: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Stripe 4: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Stripe 5: + Column 0: count: 1000 hasNull: false + Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + +File Statistics: + Column 0: count: 21000 hasNull: false + Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + +Stripes: + Stripe: offset: 3 data: 63765 rows: 5000 tail: 85 index: 6935 + Stream: column 0 section ROW_INDEX start: 3 length 17 + Stream: column 1 section ROW_INDEX start: 20 length 164 + Stream: column 2 section ROW_INDEX start: 184 length 173 + Stream: column 2 section BLOOM_FILTER start: 357 length 6494 + Stream: column 3 section ROW_INDEX start: 6851 length 87 + Stream: column 1 section DATA start: 6938 length 20029 + Stream: column 2 section DATA start: 26967 length 40035 + Stream: column 3 section DATA start: 67002 length 3543 + Stream: column 3 section LENGTH start: 70545 length 25 + Stream: column 3 section DICTIONARY_DATA start: 70570 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4931 loadFactor: 0.5136 expectedFpp: 0.009432924 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4956 loadFactor: 0.5163 expectedFpp: 0.009772834 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482 + Stripe: offset: 70788 data: 63754 rows: 5000 tail: 85 index: 6917 + Stream: column 0 section ROW_INDEX start: 70788 length 17 + Stream: column 1 section ROW_INDEX start: 70805 length 162 + Stream: column 2 section ROW_INDEX start: 70967 length 171 + Stream: column 2 section BLOOM_FILTER start: 71138 length 6484 + Stream: column 3 section ROW_INDEX start: 77622 length 83 + Stream: column 1 section DATA start: 77705 length 20029 + Stream: column 2 section DATA start: 97734 length 40035 + Stream: column 3 section DATA start: 137769 length 3532 + Stream: column 3 section LENGTH start: 141301 length 25 + Stream: column 3 section DICTIONARY_DATA start: 141326 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4988 loadFactor: 0.5196 expectedFpp: 0.010223193 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205 + Stripe: offset: 141544 data: 63766 rows: 5000 tail: 85 index: 6924 + Stream: column 0 section ROW_INDEX start: 141544 length 17 + Stream: column 1 section ROW_INDEX start: 141561 length 159 + Stream: column 2 section ROW_INDEX start: 141720 length 171 + Stream: column 2 section BLOOM_FILTER start: 141891 length 6487 + Stream: column 3 section ROW_INDEX start: 148378 length 90 + Stream: column 1 section DATA start: 148468 length 20029 + Stream: column 2 section DATA start: 168497 length 40035 + Stream: column 3 section DATA start: 208532 length 3544 + Stream: column 3 section LENGTH start: 212076 length 25 + Stream: column 3 section DICTIONARY_DATA start: 212101 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4967 loadFactor: 0.5174 expectedFpp: 0.009925688 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4964 loadFactor: 0.5171 expectedFpp: 0.009883798 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444 + Stripe: offset: 212319 data: 63796 rows: 5000 tail: 85 index: 6925 + Stream: column 0 section ROW_INDEX start: 212319 length 17 + Stream: column 1 section ROW_INDEX start: 212336 length 162 + Stream: column 2 section ROW_INDEX start: 212498 length 170 + Stream: column 2 section BLOOM_FILTER start: 212668 length 6485 + Stream: column 3 section ROW_INDEX start: 219153 length 91 + Stream: column 1 section DATA start: 219244 length 20029 + Stream: column 2 section DATA start: 239273 length 40035 + Stream: column 3 section DATA start: 279308 length 3574 + Stream: column 3 section LENGTH start: 282882 length 25 + Stream: column 3 section DICTIONARY_DATA start: 282907 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4951 loadFactor: 0.5157 expectedFpp: 0.009704026 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4969 loadFactor: 0.5176 expectedFpp: 0.009953696 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4994 loadFactor: 0.5202 expectedFpp: 0.010309587 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165 + Stripe: offset: 283125 data: 12940 rows: 1000 tail: 78 index: 1468 + Stream: column 0 section ROW_INDEX start: 283125 length 12 + Stream: column 1 section ROW_INDEX start: 283137 length 38 + Stream: column 2 section ROW_INDEX start: 283175 length 41 + Stream: column 2 section BLOOM_FILTER start: 283216 length 1337 + Stream: column 3 section ROW_INDEX start: 284553 length 40 + Stream: column 1 section DATA start: 284593 length 4007 + Stream: column 2 section DATA start: 288600 length 8007 + Stream: column 3 section DATA start: 296607 length 768 + Stream: column 3 section LENGTH start: 297375 length 25 + Stream: column 3 section DICTIONARY_DATA start: 297400 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 + +File length: 298155 bytes +Padding length: 0 bytes +Padding ratio: 0% diff --git ql/src/test/resources/orc-file-dump-dictionary-threshold.out ql/src/test/resources/orc-file-dump-dictionary-threshold.out index ac74e51..13e316e 100644 --- ql/src/test/resources/orc-file-dump-dictionary-threshold.out +++ ql/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -52,24 +52,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 2777,8442,0,695,18 - Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 13595,4780,0,1554,14 - Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 31432,228,0,2372,90 - Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 54111,5096,0,3354,108 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 2777,8442,0,695,18 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 13595,4780,0,1554,14 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 31432,228,0,2372,90 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 54111,5096,0,3354,108 Stripe: offset: 151897 data: 336358 rows: 5000 tail: 69 index: 954 Stream: column 0 section ROW_INDEX start: 151897 length 17 Stream: column 1 section ROW_INDEX start: 151914 length 153 @@ -83,24 +83,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 37112,6320,0,967,90 - Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 80822,9756,0,1945,222 - Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 137149,4496,0,3268,48 - Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 197972,6590,0,4064,342 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 37112,6320,0,967,90 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 80822,9756,0,1945,222 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 137149,4496,0,3268,48 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 197972,6590,0,4064,342 Stripe: offset: 489278 data: 558031 rows: 5000 tail: 69 index: 1181 Stream: column 0 section ROW_INDEX start: 489278 length 17 Stream: column 1 section ROW_INDEX start: 489295 length 166 @@ -114,24 +114,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 80352,3880,0,1097,28 - Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 170641,3422,0,2077,162 - Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 268420,9960,0,3369,16 - Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 377916,1620,0,4041,470 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 80352,3880,0,1097,28 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 170641,3422,0,2077,162 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 268420,9960,0,3369,16 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 377916,1620,0,4041,470 Stripe: offset: 1048559 data: 792850 rows: 5000 tail: 69 index: 1369 Stream: column 0 section ROW_INDEX start: 1048559 length 17 Stream: column 1 section ROW_INDEX start: 1048576 length 153 @@ -145,24 +145,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 126968,2916,0,1077,140 - Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 263111,206,0,1926,462 - Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 407371,8480,0,3444,250 - Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 562094,3058,0,4643,292 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 126968,2916,0,1077,140 + Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 263111,206,0,1926,462 + Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 407371,8480,0,3444,250 + Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 562094,3058,0,4643,292 Stripe: offset: 1842847 data: 188033 rows: 1000 tail: 67 index: 841 Stream: column 0 section ROW_INDEX start: 1842847 length 12 Stream: column 1 section ROW_INDEX start: 1842859 length 38 @@ -176,12 +176,12 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0 File length: 2033643 bytes Padding length: 0 bytes diff --git ql/src/test/resources/orc-file-dump.out ql/src/test/resources/orc-file-dump.out index cfeea24..2f5962b 100644 --- ql/src/test/resources/orc-file-dump.out +++ ql/src/test/resources/orc-file-dump.out @@ -53,24 +53,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 - Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 - Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 - Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 Stripe: offset: 64288 data: 63754 rows: 5000 tail: 79 index: 433 Stream: column 0 section ROW_INDEX start: 64288 length 17 Stream: column 1 section ROW_INDEX start: 64305 length 162 @@ -85,24 +85,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 - Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 - Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 - Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 Stripe: offset: 128554 data: 63766 rows: 5000 tail: 79 index: 437 Stream: column 0 section ROW_INDEX start: 128554 length 17 Stream: column 1 section ROW_INDEX start: 128571 length 159 @@ -117,24 +117,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 - Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 - Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 - Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 Stripe: offset: 192836 data: 63796 rows: 5000 tail: 79 index: 440 Stream: column 0 section ROW_INDEX start: 192836 length 17 Stream: column 1 section ROW_INDEX start: 192853 length 162 @@ -149,24 +149,24 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488 - Entry 2:count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 0,6150,464 - Entry 3:count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 10003,250,440 - Entry 4:count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 10003,4350,416 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488 - Entry 2:count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464 - Entry 3:count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440 - Entry 4:count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 - Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 - Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 - Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488 + Entry 2: count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 0,6150,464 + Entry 3: count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 10003,250,440 + Entry 4: count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 10003,4350,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488 + Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464 + Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440 + Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 Stripe: offset: 257151 data: 12940 rows: 1000 tail: 71 index: 131 Stream: column 0 section ROW_INDEX start: 257151 length 12 Stream: column 1 section ROW_INDEX start: 257163 length 38 @@ -181,12 +181,12 @@ Stripes: Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DICTIONARY_V2[35] - Row group index column 1: - Entry 0:count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0 - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 - Row group index column 3: - Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 + Row group indices for column 1: + Entry 0: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 File length: 270838 bytes Padding length: 0 bytes diff --git ql/src/test/resources/orc-file-has-null.out ql/src/test/resources/orc-file-has-null.out index f1dfcd3..fdc3862 100644 --- ql/src/test/resources/orc-file-has-null.out +++ ql/src/test/resources/orc-file-has-null.out @@ -42,12 +42,12 @@ Stripes: Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[2] - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 - Entry 1:count: 0 hasNull: true positions: 0,0,125,0,0,66,488 - Entry 2:count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 - Entry 3:count: 0 hasNull: true positions: 0,4,125,0,0,136,488 - Entry 4:count: 0 hasNull: true positions: 0,6,125,0,0,136,488 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488 + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488 + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488 Stripe: offset: 424 data: 156 rows: 5000 tail: 60 index: 119 Stream: column 0 section ROW_INDEX start: 424 length 17 Stream: column 1 section ROW_INDEX start: 441 length 63 @@ -61,12 +61,12 @@ Stripes: Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] - Row group index column 2: - Entry 0:count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1:count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2:count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 + Row group indices for column 2: + Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 + Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 + Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 + Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 Stripe: offset: 759 data: 186 rows: 5000 tail: 60 index: 148 Stream: column 0 section ROW_INDEX start: 759 length 17 Stream: column 1 section ROW_INDEX start: 776 length 63 @@ -79,12 +79,12 @@ Stripes: Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] - Row group index column 2: - Entry 0:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 - Entry 1:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488 - Entry 2:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 - Entry 3:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 - Entry 4:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488 + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 Stripe: offset: 1153 data: 156 rows: 5000 tail: 60 index: 119 Stream: column 0 section ROW_INDEX start: 1153 length 17 Stream: column 1 section ROW_INDEX start: 1170 length 63 @@ -98,12 +98,12 @@ Stripes: Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] - Row group index column 2: - Entry 0:count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1:count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2:count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 + Row group indices for column 2: + Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 + Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 + Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 + Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 File length: 1736 bytes Padding length: 0 bytes