diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
index 7a96373..d33d66e 100644
--- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
+++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
@@ -1256,6 +1256,24 @@ public Builder clearSum() {
*/
com.google.protobuf.ByteString
getMaximumBytes();
+
+ // optional sint64 sum = 3;
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ boolean hasSum();
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ long getSum();
}
/**
* Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.StringStatistics}
@@ -1318,6 +1336,11 @@ private StringStatistics(
maximum_ = input.readBytes();
break;
}
+ case 24: {
+ bitField0_ |= 0x00000004;
+ sum_ = input.readSInt64();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -1444,9 +1467,34 @@ public boolean hasMaximum() {
}
}
+ // optional sint64 sum = 3;
+ public static final int SUM_FIELD_NUMBER = 3;
+ private long sum_;
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public boolean hasSum() {
+ return ((bitField0_ & 0x00000004) == 0x00000004);
+ }
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public long getSum() {
+ return sum_;
+ }
+
private void initFields() {
minimum_ = "";
maximum_ = "";
+ sum_ = 0L;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -1466,6 +1514,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
if (((bitField0_ & 0x00000002) == 0x00000002)) {
output.writeBytes(2, getMaximumBytes());
}
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ output.writeSInt64(3, sum_);
+ }
getUnknownFields().writeTo(output);
}
@@ -1483,6 +1534,10 @@ public int getSerializedSize() {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(2, getMaximumBytes());
}
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeSInt64Size(3, sum_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -1603,6 +1658,8 @@ public Builder clear() {
bitField0_ = (bitField0_ & ~0x00000001);
maximum_ = "";
bitField0_ = (bitField0_ & ~0x00000002);
+ sum_ = 0L;
+ bitField0_ = (bitField0_ & ~0x00000004);
return this;
}
@@ -1639,6 +1696,10 @@ public Builder clone() {
to_bitField0_ |= 0x00000002;
}
result.maximum_ = maximum_;
+ if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+ to_bitField0_ |= 0x00000004;
+ }
+ result.sum_ = sum_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -1665,6 +1726,9 @@ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatist
maximum_ = other.maximum_;
onChanged();
}
+ if (other.hasSum()) {
+ setSum(other.getSum());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -1840,6 +1904,55 @@ public Builder setMaximumBytes(
return this;
}
+ // optional sint64 sum = 3;
+ private long sum_ ;
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public boolean hasSum() {
+ return ((bitField0_ & 0x00000004) == 0x00000004);
+ }
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public long getSum() {
+ return sum_;
+ }
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public Builder setSum(long value) {
+ bitField0_ |= 0x00000004;
+ sum_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional sint64 sum = 3;
+ *
+ *
+ * sum will store the total length of all strings in a stripe
+ *
+ */
+ public Builder clearSum() {
+ bitField0_ = (bitField0_ & ~0x00000004);
+ sum_ = 0L;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.StringStatistics)
}
@@ -3279,15 +3392,492 @@ public boolean hasMaximum() {
return ((bitField0_ & 0x00000002) == 0x00000002);
}
/**
- * optional sint32 maximum = 2;
+ * optional sint32 maximum = 2;
+ */
+ public int getMaximum() {
+ return maximum_;
+ }
+
+ private void initFields() {
+ minimum_ = 0;
+ maximum_ = 0;
+ }
+ private byte memoizedIsInitialized = -1;
+ public final boolean isInitialized() {
+ byte isInitialized = memoizedIsInitialized;
+ if (isInitialized != -1) return isInitialized == 1;
+
+ memoizedIsInitialized = 1;
+ return true;
+ }
+
+ public void writeTo(com.google.protobuf.CodedOutputStream output)
+ throws java.io.IOException {
+ getSerializedSize();
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ output.writeSInt32(1, minimum_);
+ }
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ output.writeSInt32(2, maximum_);
+ }
+ getUnknownFields().writeTo(output);
+ }
+
+ private int memoizedSerializedSize = -1;
+ public int getSerializedSize() {
+ int size = memoizedSerializedSize;
+ if (size != -1) return size;
+
+ size = 0;
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeSInt32Size(1, minimum_);
+ }
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeSInt32Size(2, maximum_);
+ }
+ size += getUnknownFields().getSerializedSize();
+ memoizedSerializedSize = size;
+ return size;
+ }
+
+ private static final long serialVersionUID = 0L;
+ @java.lang.Override
+ protected java.lang.Object writeReplace()
+ throws java.io.ObjectStreamException {
+ return super.writeReplace();
+ }
+
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ com.google.protobuf.ByteString data)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ com.google.protobuf.ByteString data,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data, extensionRegistry);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(byte[] data)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ byte[] data,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data, extensionRegistry);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(java.io.InputStream input)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ java.io.InputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input, extensionRegistry);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseDelimitedFrom(java.io.InputStream input)
+ throws java.io.IOException {
+ return PARSER.parseDelimitedFrom(input);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseDelimitedFrom(
+ java.io.InputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseDelimitedFrom(input, extensionRegistry);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ com.google.protobuf.CodedInputStream input)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input);
+ }
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input, extensionRegistry);
+ }
+
+ public static Builder newBuilder() { return Builder.create(); }
+ public Builder newBuilderForType() { return newBuilder(); }
+ public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics prototype) {
+ return newBuilder().mergeFrom(prototype);
+ }
+ public Builder toBuilder() { return newBuilder(this); }
+
+ @java.lang.Override
+ protected Builder newBuilderForType(
+ com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+ Builder builder = new Builder(parent);
+ return builder;
+ }
+ /**
+ * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.DateStatistics}
+ */
+ public static final class Builder extends
+ com.google.protobuf.GeneratedMessage.Builder
+ implements org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatisticsOrBuilder {
+ public static final com.google.protobuf.Descriptors.Descriptor
+ getDescriptor() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_descriptor;
+ }
+
+ protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internalGetFieldAccessorTable() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_fieldAccessorTable
+ .ensureFieldAccessorsInitialized(
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.Builder.class);
+ }
+
+ // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.newBuilder()
+ private Builder() {
+ maybeForceBuilderInitialization();
+ }
+
+ private Builder(
+ com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+ super(parent);
+ maybeForceBuilderInitialization();
+ }
+ private void maybeForceBuilderInitialization() {
+ if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+ }
+ }
+ private static Builder create() {
+ return new Builder();
+ }
+
+ public Builder clear() {
+ super.clear();
+ minimum_ = 0;
+ bitField0_ = (bitField0_ & ~0x00000001);
+ maximum_ = 0;
+ bitField0_ = (bitField0_ & ~0x00000002);
+ return this;
+ }
+
+ public Builder clone() {
+ return create().mergeFrom(buildPartial());
+ }
+
+ public com.google.protobuf.Descriptors.Descriptor
+ getDescriptorForType() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_descriptor;
+ }
+
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics getDefaultInstanceForType() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance();
+ }
+
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics build() {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics result = buildPartial();
+ if (!result.isInitialized()) {
+ throw newUninitializedMessageException(result);
+ }
+ return result;
+ }
+
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics buildPartial() {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics(this);
+ int from_bitField0_ = bitField0_;
+ int to_bitField0_ = 0;
+ if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+ to_bitField0_ |= 0x00000001;
+ }
+ result.minimum_ = minimum_;
+ if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+ to_bitField0_ |= 0x00000002;
+ }
+ result.maximum_ = maximum_;
+ result.bitField0_ = to_bitField0_;
+ onBuilt();
+ return result;
+ }
+
+ public Builder mergeFrom(com.google.protobuf.Message other) {
+ if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics) {
+ return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics)other);
+ } else {
+ super.mergeFrom(other);
+ return this;
+ }
+ }
+
+ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics other) {
+ if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance()) return this;
+ if (other.hasMinimum()) {
+ setMinimum(other.getMinimum());
+ }
+ if (other.hasMaximum()) {
+ setMaximum(other.getMaximum());
+ }
+ this.mergeUnknownFields(other.getUnknownFields());
+ return this;
+ }
+
+ public final boolean isInitialized() {
+ return true;
+ }
+
+ public Builder mergeFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parsedMessage = null;
+ try {
+ parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+ } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+ parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics) e.getUnfinishedMessage();
+ throw e;
+ } finally {
+ if (parsedMessage != null) {
+ mergeFrom(parsedMessage);
+ }
+ }
+ return this;
+ }
+ private int bitField0_;
+
+ // optional sint32 minimum = 1;
+ private int minimum_ ;
+ /**
+ * optional sint32 minimum = 1;
+ *
+ *
+ * min,max values saved as days since epoch
+ *
+ */
+ public boolean hasMinimum() {
+ return ((bitField0_ & 0x00000001) == 0x00000001);
+ }
+ /**
+ * optional sint32 minimum = 1;
+ *
+ *
+ * min,max values saved as days since epoch
+ *
+ */
+ public int getMinimum() {
+ return minimum_;
+ }
+ /**
+ * optional sint32 minimum = 1;
+ *
+ *
+ * min,max values saved as days since epoch
+ *
+ */
+ public Builder setMinimum(int value) {
+ bitField0_ |= 0x00000001;
+ minimum_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional sint32 minimum = 1;
+ *
+ *
+ * min,max values saved as days since epoch
+ *
+ */
+ public Builder clearMinimum() {
+ bitField0_ = (bitField0_ & ~0x00000001);
+ minimum_ = 0;
+ onChanged();
+ return this;
+ }
+
+ // optional sint32 maximum = 2;
+ private int maximum_ ;
+ /**
+ * optional sint32 maximum = 2;
+ */
+ public boolean hasMaximum() {
+ return ((bitField0_ & 0x00000002) == 0x00000002);
+ }
+ /**
+ * optional sint32 maximum = 2;
+ */
+ public int getMaximum() {
+ return maximum_;
+ }
+ /**
+ * optional sint32 maximum = 2;
+ */
+ public Builder setMaximum(int value) {
+ bitField0_ |= 0x00000002;
+ maximum_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional sint32 maximum = 2;
+ */
+ public Builder clearMaximum() {
+ bitField0_ = (bitField0_ & ~0x00000002);
+ maximum_ = 0;
+ onChanged();
+ return this;
+ }
+
+ // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.DateStatistics)
+ }
+
+ static {
+ defaultInstance = new DateStatistics(true);
+ defaultInstance.initFields();
+ }
+
+ // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.DateStatistics)
+ }
+
+ public interface BinaryStatisticsOrBuilder
+ extends com.google.protobuf.MessageOrBuilder {
+
+ // optional sint64 sum = 1;
+ /**
+ * optional sint64 sum = 1;
+ *
+ *
+ * sum will store the total binary blob length in a stripe
+ *
+ */
+ boolean hasSum();
+ /**
+ * optional sint64 sum = 1;
+ *
+ *
+ * sum will store the total binary blob length in a stripe
+ *
+ */
+ long getSum();
+ }
+ /**
+ * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BinaryStatistics}
+ */
+ public static final class BinaryStatistics extends
+ com.google.protobuf.GeneratedMessage
+ implements BinaryStatisticsOrBuilder {
+ // Use BinaryStatistics.newBuilder() to construct.
+ private BinaryStatistics(com.google.protobuf.GeneratedMessage.Builder> builder) {
+ super(builder);
+ this.unknownFields = builder.getUnknownFields();
+ }
+ private BinaryStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+ private static final BinaryStatistics defaultInstance;
+ public static BinaryStatistics getDefaultInstance() {
+ return defaultInstance;
+ }
+
+ public BinaryStatistics getDefaultInstanceForType() {
+ return defaultInstance;
+ }
+
+ private final com.google.protobuf.UnknownFieldSet unknownFields;
+ @java.lang.Override
+ public final com.google.protobuf.UnknownFieldSet
+ getUnknownFields() {
+ return this.unknownFields;
+ }
+ private BinaryStatistics(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ initFields();
+ int mutable_bitField0_ = 0;
+ com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+ com.google.protobuf.UnknownFieldSet.newBuilder();
+ try {
+ boolean done = false;
+ while (!done) {
+ int tag = input.readTag();
+ switch (tag) {
+ case 0:
+ done = true;
+ break;
+ default: {
+ if (!parseUnknownField(input, unknownFields,
+ extensionRegistry, tag)) {
+ done = true;
+ }
+ break;
+ }
+ case 8: {
+ bitField0_ |= 0x00000001;
+ sum_ = input.readSInt64();
+ break;
+ }
+ }
+ }
+ } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(this);
+ } catch (java.io.IOException e) {
+ throw new com.google.protobuf.InvalidProtocolBufferException(
+ e.getMessage()).setUnfinishedMessage(this);
+ } finally {
+ this.unknownFields = unknownFields.build();
+ makeExtensionsImmutable();
+ }
+ }
+ public static final com.google.protobuf.Descriptors.Descriptor
+ getDescriptor() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor;
+ }
+
+ protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internalGetFieldAccessorTable() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_fieldAccessorTable
+ .ensureFieldAccessorsInitialized(
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder.class);
+ }
+
+ public static com.google.protobuf.Parser PARSER =
+ new com.google.protobuf.AbstractParser() {
+ public BinaryStatistics parsePartialFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return new BinaryStatistics(input, extensionRegistry);
+ }
+ };
+
+ @java.lang.Override
+ public com.google.protobuf.Parser getParserForType() {
+ return PARSER;
+ }
+
+ private int bitField0_;
+ // optional sint64 sum = 1;
+ public static final int SUM_FIELD_NUMBER = 1;
+ private long sum_;
+ /**
+ * optional sint64 sum = 1;
+ *
+ *
+ * sum will store the total binary blob length in a stripe
+ *
+ */
+ public boolean hasSum() {
+ return ((bitField0_ & 0x00000001) == 0x00000001);
+ }
+ /**
+ * optional sint64 sum = 1;
+ *
+ *
+ * sum will store the total binary blob length in a stripe
+ *
*/
- public int getMaximum() {
- return maximum_;
+ public long getSum() {
+ return sum_;
}
private void initFields() {
- minimum_ = 0;
- maximum_ = 0;
+ sum_ = 0L;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -3302,10 +3892,7 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
getSerializedSize();
if (((bitField0_ & 0x00000001) == 0x00000001)) {
- output.writeSInt32(1, minimum_);
- }
- if (((bitField0_ & 0x00000002) == 0x00000002)) {
- output.writeSInt32(2, maximum_);
+ output.writeSInt64(1, sum_);
}
getUnknownFields().writeTo(output);
}
@@ -3318,11 +3905,7 @@ public int getSerializedSize() {
size = 0;
if (((bitField0_ & 0x00000001) == 0x00000001)) {
size += com.google.protobuf.CodedOutputStream
- .computeSInt32Size(1, minimum_);
- }
- if (((bitField0_ & 0x00000002) == 0x00000002)) {
- size += com.google.protobuf.CodedOutputStream
- .computeSInt32Size(2, maximum_);
+ .computeSInt64Size(1, sum_);
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
@@ -3336,53 +3919,53 @@ public int getSerializedSize() {
return super.writeReplace();
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(byte[] data)
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(java.io.InputStream input)
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(java.io.InputStream input)
throws java.io.IOException {
return PARSER.parseFrom(input);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return PARSER.parseFrom(input, extensionRegistry);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseDelimitedFrom(java.io.InputStream input)
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return PARSER.parseDelimitedFrom(input);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseDelimitedFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return PARSER.parseDelimitedFrom(input, extensionRegistry);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return PARSER.parseFrom(input);
}
- public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parseFrom(
+ public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
@@ -3391,7 +3974,7 @@ public int getSerializedSize() {
public static Builder newBuilder() { return Builder.create(); }
public Builder newBuilderForType() { return newBuilder(); }
- public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics prototype) {
+ public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics prototype) {
return newBuilder().mergeFrom(prototype);
}
public Builder toBuilder() { return newBuilder(this); }
@@ -3403,24 +3986,24 @@ protected Builder newBuilderForType(
return builder;
}
/**
- * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.DateStatistics}
+ * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.BinaryStatistics}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessage.Builder
- implements org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatisticsOrBuilder {
+ implements org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
- return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_descriptor;
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor;
}
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
- return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_fieldAccessorTable
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_fieldAccessorTable
.ensureFieldAccessorsInitialized(
- org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.Builder.class);
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder.class);
}
- // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.newBuilder()
+ // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.newBuilder()
private Builder() {
maybeForceBuilderInitialization();
}
@@ -3440,10 +4023,8 @@ private static Builder create() {
public Builder clear() {
super.clear();
- minimum_ = 0;
+ sum_ = 0L;
bitField0_ = (bitField0_ & ~0x00000001);
- maximum_ = 0;
- bitField0_ = (bitField0_ & ~0x00000002);
return this;
}
@@ -3453,54 +4034,47 @@ public Builder clone() {
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
- return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_descriptor;
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor;
}
- public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics getDefaultInstanceForType() {
- return org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance();
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics getDefaultInstanceForType() {
+ return org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
}
- public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics build() {
- org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics result = buildPartial();
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics build() {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
- public org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics buildPartial() {
- org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics(this);
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics buildPartial() {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics(this);
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
to_bitField0_ |= 0x00000001;
}
- result.minimum_ = minimum_;
- if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
- to_bitField0_ |= 0x00000002;
- }
- result.maximum_ = maximum_;
+ result.sum_ = sum_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
}
public Builder mergeFrom(com.google.protobuf.Message other) {
- if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics) {
- return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics)other);
+ if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics) {
+ return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics)other);
} else {
super.mergeFrom(other);
return this;
}
}
- public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics other) {
- if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance()) return this;
- if (other.hasMinimum()) {
- setMinimum(other.getMinimum());
- }
- if (other.hasMaximum()) {
- setMaximum(other.getMaximum());
+ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics other) {
+ if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance()) return this;
+ if (other.hasSum()) {
+ setSum(other.getSum());
}
this.mergeUnknownFields(other.getUnknownFields());
return this;
@@ -3514,11 +4088,11 @@ public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
- org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics parsedMessage = null;
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics parsedMessage = null;
try {
parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
- parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics) e.getUnfinishedMessage();
+ parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics) e.getUnfinishedMessage();
throw e;
} finally {
if (parsedMessage != null) {
@@ -3529,97 +4103,64 @@ public Builder mergeFrom(
}
private int bitField0_;
- // optional sint32 minimum = 1;
- private int minimum_ ;
+ // optional sint64 sum = 1;
+ private long sum_ ;
/**
- * optional sint32 minimum = 1;
+ * optional sint64 sum = 1;
*
*
- * min,max values saved as days since epoch
+ * sum will store the total binary blob length in a stripe
*
*/
- public boolean hasMinimum() {
+ public boolean hasSum() {
return ((bitField0_ & 0x00000001) == 0x00000001);
}
/**
- * optional sint32 minimum = 1;
+ * optional sint64 sum = 1;
*
*
- * min,max values saved as days since epoch
+ * sum will store the total binary blob length in a stripe
*
*/
- public int getMinimum() {
- return minimum_;
+ public long getSum() {
+ return sum_;
}
/**
- * optional sint32 minimum = 1;
+ * optional sint64 sum = 1;
*
*
- * min,max values saved as days since epoch
+ * sum will store the total binary blob length in a stripe
*
*/
- public Builder setMinimum(int value) {
+ public Builder setSum(long value) {
bitField0_ |= 0x00000001;
- minimum_ = value;
+ sum_ = value;
onChanged();
return this;
}
/**
- * optional sint32 minimum = 1;
+ * optional sint64 sum = 1;
*
*
- * min,max values saved as days since epoch
+ * sum will store the total binary blob length in a stripe
*
*/
- public Builder clearMinimum() {
+ public Builder clearSum() {
bitField0_ = (bitField0_ & ~0x00000001);
- minimum_ = 0;
- onChanged();
- return this;
- }
-
- // optional sint32 maximum = 2;
- private int maximum_ ;
- /**
- * optional sint32 maximum = 2;
- */
- public boolean hasMaximum() {
- return ((bitField0_ & 0x00000002) == 0x00000002);
- }
- /**
- * optional sint32 maximum = 2;
- */
- public int getMaximum() {
- return maximum_;
- }
- /**
- * optional sint32 maximum = 2;
- */
- public Builder setMaximum(int value) {
- bitField0_ |= 0x00000002;
- maximum_ = value;
- onChanged();
- return this;
- }
- /**
- * optional sint32 maximum = 2;
- */
- public Builder clearMaximum() {
- bitField0_ = (bitField0_ & ~0x00000002);
- maximum_ = 0;
+ sum_ = 0L;
onChanged();
return this;
}
- // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.DateStatistics)
+ // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.BinaryStatistics)
}
static {
- defaultInstance = new DateStatistics(true);
+ defaultInstance = new BinaryStatistics(true);
defaultInstance.initFields();
}
- // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.DateStatistics)
+ // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.BinaryStatistics)
}
public interface ColumnStatisticsOrBuilder
@@ -3718,6 +4259,20 @@ public Builder clearMaximum() {
* optional .org.apache.hadoop.hive.ql.io.orc.DateStatistics dateStatistics = 7;
*/
org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatisticsOrBuilder getDateStatisticsOrBuilder();
+
+ // optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ boolean hasBinaryStatistics();
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics getBinaryStatistics();
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder getBinaryStatisticsOrBuilder();
}
/**
* Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.ColumnStatistics}
@@ -3853,6 +4408,19 @@ private ColumnStatistics(
bitField0_ |= 0x00000040;
break;
}
+ case 66: {
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder subBuilder = null;
+ if (((bitField0_ & 0x00000080) == 0x00000080)) {
+ subBuilder = binaryStatistics_.toBuilder();
+ }
+ binaryStatistics_ = input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.PARSER, extensionRegistry);
+ if (subBuilder != null) {
+ subBuilder.mergeFrom(binaryStatistics_);
+ binaryStatistics_ = subBuilder.buildPartial();
+ }
+ bitField0_ |= 0x00000080;
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -4041,6 +4609,28 @@ public boolean hasDateStatistics() {
return dateStatistics_;
}
+ // optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ public static final int BINARYSTATISTICS_FIELD_NUMBER = 8;
+ private org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics binaryStatistics_;
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public boolean hasBinaryStatistics() {
+ return ((bitField0_ & 0x00000080) == 0x00000080);
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics getBinaryStatistics() {
+ return binaryStatistics_;
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder getBinaryStatisticsOrBuilder() {
+ return binaryStatistics_;
+ }
+
private void initFields() {
numberOfValues_ = 0L;
intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
@@ -4049,6 +4639,7 @@ private void initFields() {
bucketStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance();
decimalStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance();
dateStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance();
+ binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -4083,6 +4674,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
if (((bitField0_ & 0x00000040) == 0x00000040)) {
output.writeMessage(7, dateStatistics_);
}
+ if (((bitField0_ & 0x00000080) == 0x00000080)) {
+ output.writeMessage(8, binaryStatistics_);
+ }
getUnknownFields().writeTo(output);
}
@@ -4120,6 +4714,10 @@ public int getSerializedSize() {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(7, dateStatistics_);
}
+ if (((bitField0_ & 0x00000080) == 0x00000080)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeMessageSize(8, binaryStatistics_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -4234,6 +4832,7 @@ private void maybeForceBuilderInitialization() {
getBucketStatisticsFieldBuilder();
getDecimalStatisticsFieldBuilder();
getDateStatisticsFieldBuilder();
+ getBinaryStatisticsFieldBuilder();
}
}
private static Builder create() {
@@ -4280,6 +4879,12 @@ public Builder clear() {
dateStatisticsBuilder_.clear();
}
bitField0_ = (bitField0_ & ~0x00000040);
+ if (binaryStatisticsBuilder_ == null) {
+ binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
+ } else {
+ binaryStatisticsBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000080);
return this;
}
@@ -4360,6 +4965,14 @@ public Builder clone() {
} else {
result.dateStatistics_ = dateStatisticsBuilder_.build();
}
+ if (((from_bitField0_ & 0x00000080) == 0x00000080)) {
+ to_bitField0_ |= 0x00000080;
+ }
+ if (binaryStatisticsBuilder_ == null) {
+ result.binaryStatistics_ = binaryStatistics_;
+ } else {
+ result.binaryStatistics_ = binaryStatisticsBuilder_.build();
+ }
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -4397,6 +5010,9 @@ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatist
if (other.hasDateStatistics()) {
mergeDateStatistics(other.getDateStatistics());
}
+ if (other.hasBinaryStatistics()) {
+ mergeBinaryStatistics(other.getBinaryStatistics());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -5159,6 +5775,123 @@ public Builder clearDateStatistics() {
return dateStatisticsBuilder_;
}
+ // optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ private org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder> binaryStatisticsBuilder_;
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public boolean hasBinaryStatistics() {
+ return ((bitField0_ & 0x00000080) == 0x00000080);
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics getBinaryStatistics() {
+ if (binaryStatisticsBuilder_ == null) {
+ return binaryStatistics_;
+ } else {
+ return binaryStatisticsBuilder_.getMessage();
+ }
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public Builder setBinaryStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics value) {
+ if (binaryStatisticsBuilder_ == null) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ binaryStatistics_ = value;
+ onChanged();
+ } else {
+ binaryStatisticsBuilder_.setMessage(value);
+ }
+ bitField0_ |= 0x00000080;
+ return this;
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public Builder setBinaryStatistics(
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder builderForValue) {
+ if (binaryStatisticsBuilder_ == null) {
+ binaryStatistics_ = builderForValue.build();
+ onChanged();
+ } else {
+ binaryStatisticsBuilder_.setMessage(builderForValue.build());
+ }
+ bitField0_ |= 0x00000080;
+ return this;
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public Builder mergeBinaryStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics value) {
+ if (binaryStatisticsBuilder_ == null) {
+ if (((bitField0_ & 0x00000080) == 0x00000080) &&
+ binaryStatistics_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance()) {
+ binaryStatistics_ =
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.newBuilder(binaryStatistics_).mergeFrom(value).buildPartial();
+ } else {
+ binaryStatistics_ = value;
+ }
+ onChanged();
+ } else {
+ binaryStatisticsBuilder_.mergeFrom(value);
+ }
+ bitField0_ |= 0x00000080;
+ return this;
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public Builder clearBinaryStatistics() {
+ if (binaryStatisticsBuilder_ == null) {
+ binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
+ onChanged();
+ } else {
+ binaryStatisticsBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000080);
+ return this;
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder getBinaryStatisticsBuilder() {
+ bitField0_ |= 0x00000080;
+ onChanged();
+ return getBinaryStatisticsFieldBuilder().getBuilder();
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ public org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder getBinaryStatisticsOrBuilder() {
+ if (binaryStatisticsBuilder_ != null) {
+ return binaryStatisticsBuilder_.getMessageOrBuilder();
+ } else {
+ return binaryStatistics_;
+ }
+ }
+ /**
+ * optional .org.apache.hadoop.hive.ql.io.orc.BinaryStatistics binaryStatistics = 8;
+ */
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder>
+ getBinaryStatisticsFieldBuilder() {
+ if (binaryStatisticsBuilder_ == null) {
+ binaryStatisticsBuilder_ = new com.google.protobuf.SingleFieldBuilder<
+ org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatisticsOrBuilder>(
+ binaryStatistics_,
+ getParentForChildren(),
+ isClean());
+ binaryStatistics_ = null;
+ }
+ return binaryStatisticsBuilder_;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.ColumnStatistics)
}
@@ -14321,6 +15054,11 @@ public Builder setMagicBytes(
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_fieldAccessorTable;
private static com.google.protobuf.Descriptors.Descriptor
+ internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor;
+ private static
+ com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_fieldAccessorTable;
+ private static com.google.protobuf.Descriptors.Descriptor
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor;
private static
com.google.protobuf.GeneratedMessage.FieldAccessorTable
@@ -14388,71 +15126,74 @@ public Builder setMagicBytes(
"e.ql.io.orc\"B\n\021IntegerStatistics\022\017\n\007mini" +
"mum\030\001 \001(\022\022\017\n\007maximum\030\002 \001(\022\022\013\n\003sum\030\003 \001(\022\"" +
"A\n\020DoubleStatistics\022\017\n\007minimum\030\001 \001(\001\022\017\n\007" +
- "maximum\030\002 \001(\001\022\013\n\003sum\030\003 \001(\001\"4\n\020StringStat" +
+ "maximum\030\002 \001(\001\022\013\n\003sum\030\003 \001(\001\"A\n\020StringStat" +
"istics\022\017\n\007minimum\030\001 \001(\t\022\017\n\007maximum\030\002 \001(\t" +
- "\"%\n\020BucketStatistics\022\021\n\005count\030\001 \003(\004B\002\020\001\"" +
- "B\n\021DecimalStatistics\022\017\n\007minimum\030\001 \001(\t\022\017\n" +
- "\007maximum\030\002 \001(\t\022\013\n\003sum\030\003 \001(\t\"2\n\016DateStati" +
- "stics\022\017\n\007minimum\030\001 \001(\021\022\017\n\007maximum\030\002 \001(\021\"",
- "\372\003\n\020ColumnStatistics\022\026\n\016numberOfValues\030\001" +
- " \001(\004\022J\n\rintStatistics\030\002 \001(\01323.org.apache" +
- ".hadoop.hive.ql.io.orc.IntegerStatistics" +
- "\022L\n\020doubleStatistics\030\003 \001(\01322.org.apache." +
- "hadoop.hive.ql.io.orc.DoubleStatistics\022L" +
- "\n\020stringStatistics\030\004 \001(\01322.org.apache.ha" +
- "doop.hive.ql.io.orc.StringStatistics\022L\n\020" +
- "bucketStatistics\030\005 \001(\01322.org.apache.hado" +
- "op.hive.ql.io.orc.BucketStatistics\022N\n\021de" +
- "cimalStatistics\030\006 \001(\01323.org.apache.hadoo",
- "p.hive.ql.io.orc.DecimalStatistics\022H\n\016da" +
- "teStatistics\030\007 \001(\01320.org.apache.hadoop.h" +
- "ive.ql.io.orc.DateStatistics\"n\n\rRowIndex" +
- "Entry\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nstatisti" +
- "cs\030\002 \001(\01322.org.apache.hadoop.hive.ql.io." +
- "orc.ColumnStatistics\"J\n\010RowIndex\022>\n\005entr" +
- "y\030\001 \003(\0132/.org.apache.hadoop.hive.ql.io.o" +
- "rc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kind\030\001 \002(" +
- "\0162-.org.apache.hadoop.hive.ql.io.orc.Str" +
- "eam.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003 \001(\004",
- "\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGT" +
- "H\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONARY_C" +
- "OUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006\"\263\001\n" +
- "\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.org.apac" +
- "he.hadoop.hive.ql.io.orc.ColumnEncoding." +
- "Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind\022\n\n\006" +
- "DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V2\020\002\022" +
- "\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFooter\0229\n\007s" +
- "treams\030\001 \003(\0132(.org.apache.hadoop.hive.ql" +
- ".io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org.ap",
- "ache.hadoop.hive.ql.io.orc.ColumnEncodin" +
- "g\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" +
- "doop.hive.ql.io.orc.Type.Kind\022\024\n\010subtype" +
- "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaxim" +
- "umLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004" +
- "BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005" +
- "FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINAR" +
- "Y\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n" +
- "\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DAT" +
- "E\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n",
- "\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\nda" +
- "taLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014" +
- "numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014" +
- "\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024" +
- "\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 " +
- "\001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.hadoop" +
- ".hive.ql.io.orc.StripeInformation\0225\n\005typ" +
- "es\030\004 \003(\0132&.org.apache.hadoop.hive.ql.io." +
- "orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apache." +
- "hadoop.hive.ql.io.orc.UserMetadataItem\022\024",
- "\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\013" +
- "22.org.apache.hadoop.hive.ql.io.orc.Colu" +
- "mnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n" +
- "\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013com" +
- "pression\030\002 \001(\01621.org.apache.hadoop.hive." +
- "ql.io.orc.CompressionKind\022\034\n\024compression" +
- "BlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005" +
- "magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020" +
- "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+ "\022\013\n\003sum\030\003 \001(\022\"%\n\020BucketStatistics\022\021\n\005cou" +
+ "nt\030\001 \003(\004B\002\020\001\"B\n\021DecimalStatistics\022\017\n\007min" +
+ "imum\030\001 \001(\t\022\017\n\007maximum\030\002 \001(\t\022\013\n\003sum\030\003 \001(\t" +
+ "\"2\n\016DateStatistics\022\017\n\007minimum\030\001 \001(\021\022\017\n\007m",
+ "aximum\030\002 \001(\021\"\037\n\020BinaryStatistics\022\013\n\003sum\030" +
+ "\001 \001(\022\"\310\004\n\020ColumnStatistics\022\026\n\016numberOfVa" +
+ "lues\030\001 \001(\004\022J\n\rintStatistics\030\002 \001(\01323.org." +
+ "apache.hadoop.hive.ql.io.orc.IntegerStat" +
+ "istics\022L\n\020doubleStatistics\030\003 \001(\01322.org.a" +
+ "pache.hadoop.hive.ql.io.orc.DoubleStatis" +
+ "tics\022L\n\020stringStatistics\030\004 \001(\01322.org.apa" +
+ "che.hadoop.hive.ql.io.orc.StringStatisti" +
+ "cs\022L\n\020bucketStatistics\030\005 \001(\01322.org.apach" +
+ "e.hadoop.hive.ql.io.orc.BucketStatistics",
+ "\022N\n\021decimalStatistics\030\006 \001(\01323.org.apache" +
+ ".hadoop.hive.ql.io.orc.DecimalStatistics" +
+ "\022H\n\016dateStatistics\030\007 \001(\01320.org.apache.ha" +
+ "doop.hive.ql.io.orc.DateStatistics\022L\n\020bi" +
+ "naryStatistics\030\010 \001(\01322.org.apache.hadoop" +
+ ".hive.ql.io.orc.BinaryStatistics\"n\n\rRowI" +
+ "ndexEntry\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nstat" +
+ "istics\030\002 \001(\01322.org.apache.hadoop.hive.ql" +
+ ".io.orc.ColumnStatistics\"J\n\010RowIndex\022>\n\005" +
+ "entry\030\001 \003(\0132/.org.apache.hadoop.hive.ql.",
+ "io.orc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kind\030" +
+ "\001 \002(\0162-.org.apache.hadoop.hive.ql.io.orc" +
+ ".Stream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003" +
+ " \001(\004\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006L" +
+ "ENGTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONA" +
+ "RY_COUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006" +
+ "\"\263\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.org." +
+ "apache.hadoop.hive.ql.io.orc.ColumnEncod" +
+ "ing.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind" +
+ "\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V",
+ "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFooter\022" +
+ "9\n\007streams\030\001 \003(\0132(.org.apache.hadoop.hiv" +
+ "e.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.or" +
+ "g.apache.hadoop.hive.ql.io.orc.ColumnEnc" +
+ "oding\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
+ "e.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010sub" +
+ "types\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rm" +
+ "aximumLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000" +
+ "\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004" +
+ "\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006B",
+ "INARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020" +
+ "\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n" +
+ "\004DATE\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformatio" +
+ "n\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022" +
+ "\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004" +
+ "\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataIt" +
+ "em\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Foot" +
+ "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
+ "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
+ "doop.hive.ql.io.orc.StripeInformation\0225\n",
+ "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql" +
+ ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
+ "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
+ "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" +
+ " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
+ "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
+ "\"\255\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
+ "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
+ "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
+ "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001",
+ "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" +
+ "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -14476,7 +15217,7 @@ public Builder setMagicBytes(
internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_descriptor,
- new java.lang.String[] { "Minimum", "Maximum", });
+ new java.lang.String[] { "Minimum", "Maximum", "Sum", });
internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_descriptor =
getDescriptor().getMessageTypes().get(3);
internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_fieldAccessorTable = new
@@ -14495,68 +15236,74 @@ public Builder setMagicBytes(
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_DateStatistics_descriptor,
new java.lang.String[] { "Minimum", "Maximum", });
- internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor =
+ internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor =
getDescriptor().getMessageTypes().get(6);
+ internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_fieldAccessorTable = new
+ com.google.protobuf.GeneratedMessage.FieldAccessorTable(
+ internal_static_org_apache_hadoop_hive_ql_io_orc_BinaryStatistics_descriptor,
+ new java.lang.String[] { "Sum", });
+ internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor =
+ getDescriptor().getMessageTypes().get(7);
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor,
- new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", });
+ new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", "BinaryStatistics", });
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor =
- getDescriptor().getMessageTypes().get(7);
+ getDescriptor().getMessageTypes().get(8);
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor,
new java.lang.String[] { "Positions", "Statistics", });
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_descriptor =
- getDescriptor().getMessageTypes().get(8);
+ getDescriptor().getMessageTypes().get(9);
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_descriptor,
new java.lang.String[] { "Entry", });
internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor =
- getDescriptor().getMessageTypes().get(9);
+ getDescriptor().getMessageTypes().get(10);
internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor,
new java.lang.String[] { "Kind", "Column", "Length", });
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor =
- getDescriptor().getMessageTypes().get(10);
+ getDescriptor().getMessageTypes().get(11);
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor,
new java.lang.String[] { "Kind", "DictionarySize", });
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor =
- getDescriptor().getMessageTypes().get(11);
+ getDescriptor().getMessageTypes().get(12);
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor,
new java.lang.String[] { "Streams", "Columns", });
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor =
- getDescriptor().getMessageTypes().get(12);
+ getDescriptor().getMessageTypes().get(13);
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor,
new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", });
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor =
- getDescriptor().getMessageTypes().get(13);
+ getDescriptor().getMessageTypes().get(14);
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor,
new java.lang.String[] { "Offset", "IndexLength", "DataLength", "FooterLength", "NumberOfRows", });
internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor =
- getDescriptor().getMessageTypes().get(14);
+ getDescriptor().getMessageTypes().get(15);
internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor,
new java.lang.String[] { "Name", "Value", });
internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor =
- getDescriptor().getMessageTypes().get(15);
+ getDescriptor().getMessageTypes().get(16);
internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor,
new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", });
internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor =
- getDescriptor().getMessageTypes().get(16);
+ getDescriptor().getMessageTypes().get(17);
internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor,
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
new file mode 100644
index 0000000..23030a3
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for binary columns.
+ */
+public interface BinaryColumnStatistics extends ColumnStatistics {
+ long getSum();
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index 6268617..42d897c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -21,6 +21,7 @@
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
class ColumnStatisticsImpl implements ColumnStatistics {
@@ -332,10 +333,11 @@ public String toString() {
}
}
- private static final class StringStatisticsImpl extends ColumnStatisticsImpl
+ protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
implements StringColumnStatistics {
private String minimum = null;
private String maximum = null;
+ private long sum = 0;
StringStatisticsImpl() {
}
@@ -349,6 +351,9 @@ public String toString() {
if (str.hasMinimum()) {
minimum = str.getMinimum();
}
+ if(str.hasSum()) {
+ sum = str.getSum();
+ }
}
@Override
@@ -356,6 +361,7 @@ void reset() {
super.reset();
minimum = null;
maximum = null;
+ sum = 0;
}
@Override
@@ -368,6 +374,7 @@ void updateString(String value) {
} else if (maximum.compareTo(value) < 0) {
maximum = value;
}
+ sum += value.length();
}
@Override
@@ -384,6 +391,7 @@ void merge(ColumnStatisticsImpl other) {
maximum = str.maximum;
}
}
+ sum += str.sum;
}
@Override
@@ -394,6 +402,7 @@ void merge(ColumnStatisticsImpl other) {
if (getNumberOfValues() != 0) {
str.setMinimum(minimum);
str.setMaximum(maximum);
+ str.setSum(sum);
}
result.setStringStatistics(str);
return result;
@@ -410,6 +419,11 @@ public String getMaximum() {
}
@Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
@@ -417,6 +431,67 @@ public String toString() {
buf.append(minimum);
buf.append(" max: ");
buf.append(maximum);
+ buf.append(" sum: ");
+ buf.append(sum);
+ }
+ return buf.toString();
+ }
+ }
+
+ protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
+ BinaryColumnStatistics {
+
+ private long sum = 0;
+
+ BinaryStatisticsImpl() {
+ }
+
+ BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
+ if (binStats.hasSum()) {
+ sum = binStats.getSum();
+ }
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ sum = 0;
+ }
+
+ @Override
+ void updateBinary(BytesWritable value) {
+ sum += value.getLength();
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ super.merge(other);
+ BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+ sum += bin.sum;
+ }
+
+ @Override
+ public long getSum() {
+ return sum;
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
+ bin.setSum(sum);
+ result.setBinaryStatistics(bin);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" sum: ");
+ buf.append(sum);
}
return buf.toString();
}
@@ -666,6 +741,10 @@ void updateString(String value) {
throw new UnsupportedOperationException("Can't update string");
}
+ void updateBinary(BytesWritable value) {
+ throw new UnsupportedOperationException("Can't update binary");
+ }
+
void updateDecimal(HiveDecimal value) {
throw new UnsupportedOperationException("Can't update decimal");
}
@@ -720,6 +799,8 @@ static ColumnStatisticsImpl create(ObjectInspector inspector) {
return new DecimalStatisticsImpl();
case DATE:
return new DateStatisticsImpl();
+ case BINARY:
+ return new BinaryStatisticsImpl();
default:
return new ColumnStatisticsImpl();
}
@@ -741,6 +822,8 @@ static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
return new DecimalStatisticsImpl(stats);
} else if (stats.hasDateStatistics()) {
return new DateStatisticsImpl(stats);
+ } else if(stats.hasBinaryStatistics()) {
+ return new BinaryStatisticsImpl(stats);
} else {
return new ColumnStatisticsImpl(stats);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index 6f8ca73..62e7b34 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -22,6 +22,7 @@
import org.apache.hadoop.hive.ql.io.FSRecordWriter;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
+import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.NullWritable;
@@ -44,14 +45,17 @@
private static class OrcRecordWriter
implements RecordWriter,
- FSRecordWriter {
+ FSRecordWriter,
+ FSRecordWriter.StatsProvidingRecordWriter {
private Writer writer = null;
private final Path path;
private final OrcFile.WriterOptions options;
+ private final SerDeStats stats;
OrcRecordWriter(Path path, OrcFile.WriterOptions options) {
this.path = path;
this.options = options;
+ this.stats = new SerDeStats();
}
@Override
@@ -93,6 +97,13 @@ public void close(boolean b) throws IOException {
}
writer.close();
}
+
+ @Override
+ public SerDeStats getStats() {
+ stats.setRawDataSize(writer.getRawDataSize());
+ stats.setRowCount(writer.getNumberOfRows());
+ return stats;
+ }
}
@Override
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index e034ca0..2f3d735 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -18,23 +18,29 @@
package org.apache.hadoop.hive.ql.io.orc;
-import com.google.protobuf.CodedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.StringUtils;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import com.google.protobuf.CodedInputStream;
final class ReaderImpl implements Reader {
@@ -49,6 +55,7 @@
private final int bufferSize;
private final OrcProto.Footer footer;
private final ObjectInspector inspector;
+ private long deserializedSize = -1;
private static class StripeInformationImpl
implements StripeInformation {
@@ -345,12 +352,122 @@ public RecordReader rows(long offset, long length, boolean[] include,
@Override
public long getRawDataSize() {
+ // if the deserializedSize is not computed, then compute it, else
+ // return the already computed size. since we are reading from the footer
+ // we don't have to compute deserialized size repeatedly
+ if (deserializedSize == -1) {
+ List stats = footer.getStatisticsList();
+ List indices = Lists.newArrayList();
+ for (int i = 0; i < stats.size(); ++i) {
+ indices.add(i);
+ }
+ deserializedSize = getRawDataSizeFromColIndices(indices);
+ }
+ return deserializedSize;
+ }
+
+ private long getRawDataSizeFromColIndices(List colIndices) {
+ long result = 0;
+ for (int colIdx : colIndices) {
+ result += getRawDataSizeOfColumn(colIdx);
+ }
+ return result;
+ }
+
+ private long getRawDataSizeOfColumn(int colIdx) {
+ OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
+ long numVals = colStat.getNumberOfValues();
+ Type type = footer.getTypes(colIdx);
+
+ switch (type.getKind()) {
+ case BINARY:
+ // old orc format doesn't support binary statistics. checking for binary
+ // statistics is not required as protocol buffers takes care of it.
+ return colStat.getBinaryStatistics().getSum();
+ case STRING:
+ // old orc format doesn't support sum for string statistics. checking for
+ // existence is not required as protocol buffers takes care of it.
+
+ // ORC strings are deserialized to java strings. so use java data model's
+ // string size
+ numVals = numVals == 0 ? 1 : numVals;
+ int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals);
+ return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen);
+ case TIMESTAMP:
+ return numVals * JavaDataModel.get().lengthOfTimestamp();
+ case DATE:
+ return numVals * JavaDataModel.get().lengthOfDate();
+ case DECIMAL:
+ return numVals * JavaDataModel.get().lengthOfDecimal();
+ case DOUBLE:
+ case LONG:
+ return numVals * JavaDataModel.get().primitive2();
+ case FLOAT:
+ case INT:
+ case SHORT:
+ case BOOLEAN:
+ case BYTE:
+ return numVals * JavaDataModel.get().primitive1();
+ default:
+ LOG.debug("Unknown primitive category.");
+ break;
+ }
+
return 0;
}
@Override
public long getRawDataSizeOfColumns(List colNames) {
- return 0;
+ List colIndices = getColumnIndicesFromNames(colNames);
+ return getRawDataSizeFromColIndices(colIndices);
+ }
+
+ private List getColumnIndicesFromNames(List colNames) {
+ // top level struct
+ Type type = footer.getTypesList().get(0);
+ List colIndices = Lists.newArrayList();
+ List fieldNames = type.getFieldNamesList();
+ int fieldIdx = 0;
+ for (String colName : colNames) {
+ if (fieldNames.contains(colName)) {
+ fieldIdx = fieldNames.indexOf(colName);
+ }
+
+ // a single field may span multiple columns. find start and end column
+ // index for the requested field
+ int idxStart = type.getSubtypes(fieldIdx);
+
+ int idxEnd = 0;
+
+ // if the specified is the last field and then end index will be last
+ // column index
+ if (fieldIdx + 1 > fieldNames.size() - 1) {
+ idxEnd = getLastIdx() + 1;
+ } else {
+ idxEnd = type.getSubtypes(fieldIdx + 1);
+ }
+
+ // if start index and end index are same then the field is a primitive
+ // field else complex field (like map, list, struct, union)
+ if (idxStart == idxEnd) {
+ // simple field
+ colIndices.add(idxStart);
+ } else {
+ // complex fields spans multiple columns
+ for (int i = idxStart; i < idxEnd; i++) {
+ colIndices.add(i);
+ }
+ }
+ }
+ return colIndices;
+ }
+
+ private int getLastIdx() {
+ Set indices = Sets.newHashSet();
+ for (Type type : footer.getTypesList()) {
+ indices.addAll(type.getSubtypesList());
+ }
+ return Collections.max(indices);
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
index 72e779a..3a49269 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
@@ -32,4 +32,10 @@
* @return the maximum
*/
String getMaximum();
+
+ /**
+ * Get the total length of all strings
+ * @return the sum (total length)
+ */
+ long getSum();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index c0b55ce..628efab 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -27,19 +27,17 @@
import java.util.Map;
import java.util.TreeMap;
-
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -66,6 +64,9 @@
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
+
/**
* An ORC file writer. The file is divided into stripes, which is the natural
* unit of work when reading. Each stripe is buffered in memory until the
@@ -111,6 +112,7 @@
private int columnCount;
private long rowCount = 0;
private long rowsInStripe = 0;
+ private long rawDataSize = 0;
private int rowsInIndex = 0;
private final List stripes =
new ArrayList();
@@ -1085,6 +1087,7 @@ void write(Object obj) throws IOException {
((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
stream.write(val.getBytes(), 0, val.getLength());
length.write(val.getLength());
+ indexStatistics.updateBinary(val);
}
}
@@ -1760,6 +1763,74 @@ private void flushStripe() throws IOException {
}
}
+ private long computeRawDataSize() {
+ long result = 0;
+ for (TreeWriter child : treeWriter.getChildrenWriters()) {
+ result += getRawDataSizeFromInspectors(child, child.inspector);
+ }
+ return result;
+ }
+
+ private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) {
+ long total = 0;
+ switch (oi.getCategory()) {
+ case PRIMITIVE:
+ total += getRawDataSizeFromPrimitives(child, oi);
+ break;
+ case LIST:
+ case MAP:
+ case UNION:
+ case STRUCT:
+ for (TreeWriter tw : child.childrenWriters) {
+ total += getRawDataSizeFromInspectors(tw, tw.inspector);
+ }
+ break;
+ default:
+ LOG.debug("Unknown object inspector category.");
+ break;
+ }
+ return total;
+ }
+
+ private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
+ long result = 0;
+ long numVals = child.fileStatistics.getNumberOfValues();
+ switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case FLOAT:
+ return numVals * JavaDataModel.get().primitive1();
+ case LONG:
+ case DOUBLE:
+ return numVals * JavaDataModel.get().primitive2();
+ case STRING:
+ // ORC strings are converted to java Strings. so use JavaDataModel to
+ // compute the overall size of strings
+ child = (StringTreeWriter) child;
+ StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+ numVals = numVals == 0 ? 1 : numVals;
+ int avgStringLen = (int) (scs.getSum() / numVals);
+ return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+ case DECIMAL:
+ return numVals * JavaDataModel.get().lengthOfDecimal();
+ case DATE:
+ return numVals * JavaDataModel.get().lengthOfDate();
+ case BINARY:
+ // get total length of binary blob
+ BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+ return bcs.getSum();
+ case TIMESTAMP:
+ return numVals * JavaDataModel.get().lengthOfTimestamp();
+ default:
+ LOG.debug("Unknown primitive category.");
+ break;
+ }
+
+ return result;
+ }
+
private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
switch (kind) {
case NONE: return OrcProto.CompressionKind.NONE;
@@ -1786,6 +1857,8 @@ private int writeFooter(long bodyLength) throws IOException {
builder.setHeaderLength(headerLength);
builder.setNumberOfRows(rowCount);
builder.setRowIndexStride(rowIndexStride);
+ // populate raw data size
+ rawDataSize = computeRawDataSize();
// serialize the types
writeTypes(builder, treeWriter);
// add the stripe information
@@ -1872,13 +1945,21 @@ public void close() throws IOException {
}
}
+ /**
+ * Raw data size will be compute when writing the file footer. Hence raw data
+ * size value will be available only after closing the writer.
+ */
@Override
public long getRawDataSize() {
- return 0;
+ return rawDataSize;
}
+ /**
+ * Row count gets updated when flushing the stripes. To get accurate row
+ * count call this method after writer is closed.
+ */
@Override
public long getNumberOfRows() {
- return 0;
+ return rowCount;
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
index e3eec02..7d34167 100644
--- ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
+++ ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
@@ -119,7 +119,7 @@ public int arrayList() {
// ascii string
public int lengthFor(String string) {
- return object() + primitive1() * 3 + array() + string.length();
+ return lengthForStringOfLength(string.length());
}
public int lengthFor(NumericHistogram histogram) {
@@ -200,4 +200,33 @@ public static int round(int size) {
}
return ((size + 8) >> 3) << 3;
}
+
+ public int lengthOfDecimal() {
+ // object overhead + 8 bytes for intCompact + 4 bytes for precision
+ // + 4 bytes for scale + size of BigInteger
+ return object() + 2 * primitive2() + lengthOfBigInteger();
+ }
+
+ private int lengthOfBigInteger() {
+ // object overhead + 4 bytes for bitCount + 4 bytes for bitLength
+ // + 4 bytes for firstNonzeroByteNum + 4 bytes for firstNonzeroIntNum +
+ // + 4 bytes for lowestSetBit + 5 bytes for size of magnitude (since max precision
+ // is only 38 for HiveDecimal) + 7 bytes of padding (since java memory allocations
+ // are 8 byte aligned)
+ return object() + 4 * primitive2();
+ }
+
+ public int lengthOfTimestamp() {
+ // object overhead + 4 bytes for int (nanos) + 4 bytes of padding
+ return object() + primitive2();
+ }
+
+ public int lengthOfDate() {
+ // object overhead + 8 bytes for long (fastTime) + 16 bytes for cdate
+ return object() + 3 * primitive2();
+ }
+
+ public int lengthForStringOfLength(int strLen) {
+ return object() + primitive1() * 3 + array() + strLen;
+ }
}
diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index edbf822..cee08fd 100644
--- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -15,6 +15,8 @@ message DoubleStatistics {
message StringStatistics {
optional string minimum = 1;
optional string maximum = 2;
+ // sum will store the total length of all strings in a stripe
+ optional sint64 sum = 3;
}
message BucketStatistics {
@@ -33,6 +35,11 @@ message DateStatistics {
optional sint32 maximum = 2;
}
+message BinaryStatistics {
+ // sum will store the total binary blob length in a stripe
+ optional sint64 sum = 1;
+}
+
message ColumnStatistics {
optional uint64 numberOfValues = 1;
optional IntegerStatistics intStatistics = 2;
@@ -41,6 +48,7 @@ message ColumnStatistics {
optional BucketStatistics bucketStatistics = 5;
optional DecimalStatistics decimalStatistics = 6;
optional DateStatistics dateStatistics = 7;
+ optional BinaryStatistics binaryStatistics = 8;
}
message RowIndexEntry {
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index e6569f4..1dfcb56 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -18,12 +18,28 @@
package org.apache.hadoop.hive.ql.io.orc;
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+import static junit.framework.Assert.assertNull;
+import static junit.framework.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -55,26 +71,25 @@
import org.junit.Test;
import org.junit.rules.TestName;
-import java.io.File;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import static junit.framework.Assert.*;
-import static junit.framework.Assert.assertEquals;
-
/**
* Tests for the top level reader/streamFactory of ORC files.
*/
public class TestOrcFile {
+ public static class SimpleStruct {
+ BytesWritable bytes1;
+ Text string1;
+
+ SimpleStruct(BytesWritable b1, String s1) {
+ this.bytes1 = b1;
+ if(s1 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s1);
+ }
+ }
+ }
+
public static class InnerStruct {
int int1;
Text string1 = new Text();
@@ -132,48 +147,6 @@
}
}
- public static class AllTypesRow {
- Boolean boolean1;
- Byte byte1;
- Short short1;
- Integer int1;
- Long long1;
- Float float1;
- Double double1;
- BytesWritable bytes1;
- Text string1;
- MiddleStruct middle;
- List list = new ArrayList();
- Map map = new HashMap();
- Timestamp ts;
- HiveDecimal decimal1;
-
- AllTypesRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
- Double d1,
- BytesWritable b3, String s2, MiddleStruct m1,
- List l2, Map m2,
- Timestamp ts1, HiveDecimal decimal) {
- this.boolean1 = b1;
- this.byte1 = b2;
- this.short1 = s1;
- this.int1 = i1;
- this.long1 = l1;
- this.float1 = f1;
- this.double1 = d1;
- this.bytes1 = b3;
- if (s2 == null) {
- this.string1 = null;
- } else {
- this.string1 = new Text(s2);
- }
- this.middle = m1;
- this.list = l2;
- this.map = m2;
- this.ts = ts1;
- this.decimal1 = decimal;
- }
- }
-
private static InnerStruct inner(int i, String s) {
return new InnerStruct(i, s);
}
@@ -231,39 +204,6 @@ public void openFileSystem () throws Exception {
}
@Test
- public void testWriteFormat_0_11() throws Exception {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory
- .getReflectionObjectInspector(AllTypesRow.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- conf.set("hive.exec.orc.write.format", "0.11");
- Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
- 100000, CompressionKind.NONE, 10000, 10000);
- for(int i = 0; i < 7500; i++) {
- if (i % 2 == 0) {
- writer.addRow(new AllTypesRow(false, (byte) 1, (short) 1024, 65536,
- Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
- new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(
- inner(3, "good"), inner(4, "bad")), map(), Timestamp
- .valueOf("2000-03-12 15:00:00"), new HiveDecimal(
- "12345678.6547456")));
- } else {
- writer.addRow(new AllTypesRow(true, (byte) 100, (short) 2048, 65536,
- Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
- new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(
- inner(100000000, "cat"), inner(-100000, "in"),
- inner(1234, "hat")),
- map(inner(5, "chani"), inner(1, "mauddib")), Timestamp
- .valueOf("2000-03-12 15:00:01"), new HiveDecimal(
- "12345678.6547457")));
- }
- }
- writer.close();
- }
-
- @Test
public void testReadFormat_0_11() throws Exception {
Path resourceDir = new Path(System.getProperty("test.build.resources", "ql"
+ File.separator + "src" + File.separator + "test" + File.separator
@@ -319,7 +259,7 @@ public void testReadFormat_0_11() throws Exception {
assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
stats[7].toString());
- assertEquals("count: 7500 min: bye max: hi", stats[9].toString());
+ assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
// check the inspectors
StructObjectInspector readerInspector = (StructObjectInspector) reader
@@ -515,6 +455,93 @@ public void testReadFormat_0_11() throws Exception {
}
@Test
+ public void testStringAndBinaryStatistics() throws Exception {
+
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .bufferSize(10000));
+ writer.addRow(new SimpleStruct(bytes(0,1,2,3,4), "foo"));
+ writer.addRow(new SimpleStruct(bytes(0,1,2,3), "bar"));
+ writer.addRow(new SimpleStruct(bytes(0,1,2,3,4,5), null));
+ writer.addRow(new SimpleStruct(null, "hi"));
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(4, stats[0].getNumberOfValues());
+ assertEquals("count: 4", stats[0].toString());
+
+ assertEquals(3, stats[1].getNumberOfValues());
+ assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
+ assertEquals("count: 3 sum: 15", stats[1].toString());
+
+ assertEquals(3, stats[2].getNumberOfValues());
+ assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
+ assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
+ assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
+ assertEquals("count: 3 min: bar max: hi sum: 8",
+ stats[2].toString());
+
+ // check the inspectors
+ StructObjectInspector readerInspector =
+ (StructObjectInspector) reader.getObjectInspector();
+ assertEquals(ObjectInspector.Category.STRUCT,
+ readerInspector.getCategory());
+ assertEquals("struct",
+ readerInspector.getTypeName());
+ List extends StructField> fields =
+ readerInspector.getAllStructFieldRefs();
+ BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+ getStructFieldRef("bytes1").getFieldObjectInspector();
+ StringObjectInspector st = (StringObjectInspector) readerInspector.
+ getStructFieldRef("string1").getFieldObjectInspector();
+ RecordReader rows = reader.rows(null);
+ Object row = rows.next(null);
+ assertNotNull(row);
+ // check the contents of the first row
+ assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(bytes(0,1,2,3), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(bytes(0,1,2,3,4,5), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertNull(st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertNull(bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // handle the close up
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ @Test
public void test1() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
@@ -567,7 +594,7 @@ public void test1() throws Exception {
assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
stats[7].toString());
- assertEquals("count: 2 min: bye max: hi", stats[9].toString());
+ assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
// check the inspectors
StructObjectInspector readerInspector =
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
index b93db84..492bb00 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
@@ -125,7 +125,7 @@ public void testMultiStripeWithNull() throws Exception {
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(19998,
((StringColumnStatistics) stats[2]).getNumberOfValues());
- assertEquals("count: 19998 min: a max: a",
+ assertEquals("count: 19998 min: a max: a sum: 19998",
stats[2].toString());
// check the inspectors
@@ -229,7 +229,7 @@ public void testMultiStripeWithoutNull() throws Exception {
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(20000,
((StringColumnStatistics) stats[2]).getNumberOfValues());
- assertEquals("count: 20000 min: a max: b",
+ assertEquals("count: 20000 min: a max: b sum: 20000",
stats[2].toString());
// check the inspectors
@@ -329,7 +329,7 @@ public void testColumnsWithNullAndCompression() throws Exception {
assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(7, ((StringColumnStatistics) stats[2]).getNumberOfValues());
- assertEquals("count: 7 min: a max: h",
+ assertEquals("count: 7 min: a max: h sum: 7",
stats[2].toString());
// check the inspectors
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
new file mode 100644
index 0000000..d1ee561
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
@@ -0,0 +1,643 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+import static junit.framework.Assert.assertNull;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import com.google.common.collect.Lists;
+
+public class TestOrcSerDeStats {
+
+ public static class ListStruct {
+ List list1;
+
+ public ListStruct(List l1) {
+ this.list1 = l1;
+ }
+ }
+
+ public static class MapStruct {
+ Map map1;
+
+ public MapStruct(Map m1) {
+ this.map1 = m1;
+ }
+ }
+
+ public static class SimpleStruct {
+ BytesWritable bytes1;
+ Text string1;
+
+ SimpleStruct(BytesWritable b1, String s1) {
+ this.bytes1 = b1;
+ if (s1 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s1);
+ }
+ }
+ }
+
+ public static class InnerStruct {
+ int int1;
+ Text string1 = new Text();
+
+ InnerStruct(int int1, String string1) {
+ this.int1 = int1;
+ this.string1.set(string1);
+ }
+ }
+
+ public static class MiddleStruct {
+ List list = new ArrayList();
+
+ MiddleStruct(InnerStruct... items) {
+ list.clear();
+ for (InnerStruct item : items) {
+ list.add(item);
+ }
+ }
+ }
+
+ public static class BigRow {
+ Boolean boolean1;
+ Byte byte1;
+ Short short1;
+ Integer int1;
+ Long long1;
+ Float float1;
+ Double double1;
+ BytesWritable bytes1;
+ Text string1;
+ List list = new ArrayList();
+ Map map = new HashMap();
+ Timestamp ts;
+ HiveDecimal decimal1;
+ MiddleStruct middle;
+
+ BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+ Double d1,
+ BytesWritable b3, String s2, MiddleStruct m1,
+ List l2, Map m2, Timestamp ts1,
+ HiveDecimal dec1) {
+ this.boolean1 = b1;
+ this.byte1 = b2;
+ this.short1 = s1;
+ this.int1 = i1;
+ this.long1 = l1;
+ this.float1 = f1;
+ this.double1 = d1;
+ this.bytes1 = b3;
+ if (s2 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s2);
+ }
+ this.middle = m1;
+ this.list = l2;
+ this.map = m2;
+ this.ts = ts1;
+ this.decimal1 = dec1;
+ }
+ }
+
+ private static InnerStruct inner(int i, String s) {
+ return new InnerStruct(i, s);
+ }
+
+ private static Map map(InnerStruct... items) {
+ Map result = new HashMap();
+ for (InnerStruct i : items) {
+ result.put(new Text(i.string1), i);
+ }
+ return result;
+ }
+
+ private static List list(InnerStruct... items) {
+ List result = new ArrayList();
+ for (InnerStruct s : items) {
+ result.add(s);
+ }
+ return result;
+ }
+
+ private static BytesWritable bytes(int... items) {
+ BytesWritable result = new BytesWritable();
+ result.setSize(items.length);
+ for (int i = 0; i < items.length; ++i) {
+ result.getBytes()[i] = (byte) items[i];
+ }
+ return result;
+ }
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Rule
+ public TestName testCaseName = new TestName();
+
+ @Before
+ public void openFileSystem() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ testFilePath = new Path(workDir, "TestOrcSerDeStats." +
+ testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ @Test
+ public void testStringAndBinaryStatistics() throws Exception {
+
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .bufferSize(10000));
+ writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
+ writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
+ writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
+ writer.addRow(new SimpleStruct(null, "hi"));
+ writer.close();
+ assertEquals(4, writer.getNumberOfRows());
+ assertEquals(273, writer.getRawDataSize());
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(4, reader.getNumberOfRows());
+ assertEquals(273, reader.getRawDataSize());
+ assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
+ assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
+ assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(4, stats[0].getNumberOfValues());
+ assertEquals("count: 4", stats[0].toString());
+
+ assertEquals(3, stats[1].getNumberOfValues());
+ assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
+ assertEquals("count: 3 sum: 15", stats[1].toString());
+
+ assertEquals(3, stats[2].getNumberOfValues());
+ assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
+ assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
+ assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
+ assertEquals("count: 3 min: bar max: hi sum: 8",
+ stats[2].toString());
+
+ // check the inspectors
+ StructObjectInspector readerInspector =
+ (StructObjectInspector) reader.getObjectInspector();
+ assertEquals(ObjectInspector.Category.STRUCT,
+ readerInspector.getCategory());
+ assertEquals("struct",
+ readerInspector.getTypeName());
+ List extends StructField> fields =
+ readerInspector.getAllStructFieldRefs();
+ BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+ getStructFieldRef("bytes1").getFieldObjectInspector();
+ StringObjectInspector st = (StringObjectInspector) readerInspector.
+ getStructFieldRef("string1").getFieldObjectInspector();
+ RecordReader rows = reader.rows(null);
+ Object row = rows.next(null);
+ assertNotNull(row);
+ // check the contents of the first row
+ assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(bytes(0, 1, 2, 3), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(bytes(0, 1, 2, 3, 4, 5), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertNull(st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertNull(bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(1))));
+
+ // handle the close up
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+
+ @Test
+ public void testOrcSerDeStatsList() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(10000)
+ .bufferSize(10000));
+ for (int row = 0; row < 5000; row++) {
+ List test = new ArrayList();
+ for (int i = 0; i < 1000; i++) {
+ test.add("hi");
+ }
+ writer.addRow(new ListStruct(test));
+ }
+ writer.close();
+ assertEquals(5000, writer.getNumberOfRows());
+ assertEquals(430000000, writer.getRawDataSize());
+
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ // stats from reader
+ assertEquals(5000, reader.getNumberOfRows());
+ assertEquals(430000000, reader.getRawDataSize());
+ assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1")));
+ }
+
+ @Test
+ public void testOrcSerDeStatsMap() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(10000)
+ .bufferSize(10000));
+ for (int row = 0; row < 1000; row++) {
+ Map test = new HashMap();
+ for (int i = 0; i < 10; i++) {
+ test.put("hi" + i, 2.0);
+ }
+ writer.addRow(new MapStruct(test));
+ }
+ writer.close();
+ // stats from writer
+ assertEquals(1000, writer.getNumberOfRows());
+ assertEquals(950000, writer.getRawDataSize());
+
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ // stats from reader
+ assertEquals(1000, reader.getNumberOfRows());
+ assertEquals(950000, reader.getRawDataSize());
+ assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1")));
+ }
+
+ @Test
+ public void testOrcSerDeStatsSimpleWithNulls() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(10000)
+ .bufferSize(10000));
+ for (int row = 0; row < 1000; row++) {
+ if (row % 2 == 0) {
+ writer.addRow(new SimpleStruct(new BytesWritable(new byte[] {1, 2, 3}), "hi"));
+ } else {
+ writer.addRow(null);
+ }
+ }
+ writer.close();
+ // stats from writer
+ assertEquals(1000, writer.getNumberOfRows());
+ assertEquals(44500, writer.getRawDataSize());
+
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ // stats from reader
+ assertEquals(1000, reader.getNumberOfRows());
+ assertEquals(44500, reader.getRawDataSize());
+ assertEquals(1500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
+ assertEquals(43000, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
+ assertEquals(44500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
+ }
+
+ @Test
+ public void testOrcSerDeStatsComplex() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .bufferSize(10000));
+ // 1 + 2 + 4 + 8 + 4 + 8 + 5 + 2 + 4 + 3 + 4 + 4 + 4 + 4 + 4 + 3 = 64
+ writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
+ Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(3, "good"), inner(4, "bad")),
+ map(), Timestamp.valueOf("2000-03-12 15:00:00"), new HiveDecimal(
+ "12345678.6547456")));
+ // 1 + 2 + 4 + 8 + 4 + 8 + 3 + 4 + 3 + 4 + 4 + 4 + 3 + 4 + 2 + 4 + 3 + 5 + 4 + 5 + 7 + 4 + 7 =
+ // 97
+ writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
+ Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+ map(inner(5, "chani"), inner(1, "mauddib")), Timestamp.valueOf("2000-03-11 15:00:00"),
+ new HiveDecimal("12345678.6547452")));
+ writer.close();
+ long rowCount = writer.getNumberOfRows();
+ long rawDataSize = writer.getRawDataSize();
+ assertEquals(2, rowCount);
+ assertEquals(1740, rawDataSize);
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+
+ assertEquals(2, reader.getNumberOfRows());
+ assertEquals(1740, reader.getRawDataSize());
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("byte1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("short1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("int1")));
+ assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("long1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("float1")));
+ assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("double1")));
+ assertEquals(5, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
+ assertEquals(172, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
+ assertEquals(455, reader.getRawDataSizeOfColumns(Lists.newArrayList("list")));
+ assertEquals(368, reader.getRawDataSizeOfColumns(Lists.newArrayList("map")));
+ assertEquals(364, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle")));
+ assertEquals(80, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts")));
+ assertEquals(224, reader.getRawDataSizeOfColumns(Lists.newArrayList("decimal1")));
+ assertEquals(88, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts", "int1")));
+ assertEquals(1195,
+ reader.getRawDataSizeOfColumns(Lists.newArrayList("middle", "list", "map", "float1")));
+ assertEquals(185,
+ reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "byte1", "string1")));
+ assertEquals(rawDataSize, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1",
+ "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "list",
+ "map", "middle", "ts", "decimal1")));
+
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(2, stats[1].getNumberOfValues());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+ assertEquals("count: 2 true: 1", stats[1].toString());
+
+ assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+ assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+ assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+ assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+ assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ stats[3].toString());
+
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMaximum());
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMinimum());
+ assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+ assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ stats[5].toString());
+
+ assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+ assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+ assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+ assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ stats[7].toString());
+
+ assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+ }
+
+ @Test
+ public void testOrcSerDeStatsComplexOldFormat() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcSerDeStats.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .version(OrcFile.Version.V_0_11)
+ .bufferSize(10000));
+ // 1 + 2 + 4 + 8 + 4 + 8 + 5 + 2 + 4 + 3 + 4 + 4 + 4 + 4 + 4 + 3 = 64
+ writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
+ Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(3, "good"), inner(4, "bad")),
+ map(), Timestamp.valueOf("2000-03-12 15:00:00"), new HiveDecimal(
+ "12345678.6547456")));
+ // 1 + 2 + 4 + 8 + 4 + 8 + 3 + 4 + 3 + 4 + 4 + 4 + 3 + 4 + 2 + 4 + 3 + 5 + 4 + 5 + 7 + 4 + 7 =
+ // 97
+ writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
+ Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+ map(inner(5, "chani"), inner(1, "mauddib")), Timestamp.valueOf("2000-03-11 15:00:00"),
+ new HiveDecimal("12345678.6547452")));
+ writer.close();
+ long rowCount = writer.getNumberOfRows();
+ long rawDataSize = writer.getRawDataSize();
+ assertEquals(2, rowCount);
+ assertEquals(1740, rawDataSize);
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+
+ assertEquals(2, reader.getNumberOfRows());
+ assertEquals(1740, reader.getRawDataSize());
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("byte1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("short1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("int1")));
+ assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("long1")));
+ assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("float1")));
+ assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("double1")));
+ assertEquals(5, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
+ assertEquals(172, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
+ assertEquals(455, reader.getRawDataSizeOfColumns(Lists.newArrayList("list")));
+ assertEquals(368, reader.getRawDataSizeOfColumns(Lists.newArrayList("map")));
+ assertEquals(364, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle")));
+ assertEquals(80, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts")));
+ assertEquals(224, reader.getRawDataSizeOfColumns(Lists.newArrayList("decimal1")));
+ assertEquals(88, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts", "int1")));
+ assertEquals(1195,
+ reader.getRawDataSizeOfColumns(Lists.newArrayList("middle", "list", "map", "float1")));
+ assertEquals(185,
+ reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "byte1", "string1")));
+ assertEquals(rawDataSize, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1",
+ "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "list",
+ "map", "middle", "ts", "decimal1")));
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(2, stats[1].getNumberOfValues());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+ assertEquals("count: 2 true: 1", stats[1].toString());
+
+ assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+ assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+ assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+ assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+ assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ stats[3].toString());
+
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMaximum());
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMinimum());
+ assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+ assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ stats[5].toString());
+
+ assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+ assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+ assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+ assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ stats[7].toString());
+
+ assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());
+ assertEquals("count: 2 sum: 5", stats[8].toString());
+
+ assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
+ assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
+ assertEquals(5, ((StringColumnStatistics) stats[9]).getSum());
+ assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+ }
+
+ @Test(expected = ClassCastException.class)
+ public void testSerdeStatsOldFormat() throws Exception {
+ Path resourceDir = new Path(System.getProperty("test.build.resources", "ql"
+ + File.separator + "src" + File.separator + "test" + File.separator
+ + "resources"));
+ Path oldFilePath = new Path(resourceDir, "orc-file-11-format.orc");
+ Reader reader = OrcFile.createReader(fs, oldFilePath);
+
+ int stripeCount = 0;
+ int rowCount = 0;
+ long currentOffset = -1;
+ for (StripeInformation stripe : reader.getStripes()) {
+ stripeCount += 1;
+ rowCount += stripe.getNumberOfRows();
+ if (currentOffset < 0) {
+ currentOffset = stripe.getOffset() + stripe.getIndexLength()
+ + stripe.getDataLength() + stripe.getFooterLength();
+ } else {
+ assertEquals(currentOffset, stripe.getOffset());
+ currentOffset += stripe.getIndexLength() + stripe.getDataLength()
+ + stripe.getFooterLength();
+ }
+ }
+ assertEquals(reader.getNumberOfRows(), rowCount);
+ assertEquals(6300000, reader.getRawDataSize());
+ assertEquals(2, stripeCount);
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(7500, stats[1].getNumberOfValues());
+ assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+ assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+ assertEquals("count: 7500 true: 3750", stats[1].toString());
+
+ assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+ assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+ assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+ assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
+ assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+ stats[3].toString());
+
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMaximum());
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMinimum());
+ assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+ assertEquals(
+ "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+ stats[5].toString());
+
+ assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+ assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+ assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
+ 0.00001);
+ assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+ stats[7].toString());
+
+ assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
+ assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
+ assertEquals(0, ((StringColumnStatistics) stats[9]).getSum());
+ assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
+
+ // old orc format will not have binary statistics. toString() will show only
+ // the general column statistics
+ assertEquals("count: 7500", stats[8].toString());
+ // since old orc format doesn't support binary statistics,
+ // this should throw ClassCastException
+ assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());
+
+ }
+
+}
diff --git ql/src/test/resources/orc-file-dump-dictionary-threshold.out ql/src/test/resources/orc-file-dump-dictionary-threshold.out
index 003c132..bac7465 100644
--- ql/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ ql/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -8,71 +8,71 @@ Statistics:
Column 0: count: 21000
Column 1: count: 21000 min: -2147390285 max: 2147453086 sum: 109128518326
Column 2: count: 21000 min: -9222731174895935707 max: 9222919052987871506
- Column 3: count: 21000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
+ Column 3: count: 21000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238
Stripes:
- Stripe: offset: 3 data: 102311 rows: 4000 tail: 68 index: 217
+ Stripe: offset: 3 data: 102311 rows: 4000 tail: 68 index: 224
Stream: column 0 section ROW_INDEX start: 3 length 10
Stream: column 1 section ROW_INDEX start: 13 length 36
Stream: column 2 section ROW_INDEX start: 49 length 39
- Stream: column 3 section ROW_INDEX start: 88 length 132
- Stream: column 1 section DATA start: 220 length 16022
- Stream: column 2 section DATA start: 16242 length 32028
- Stream: column 3 section DATA start: 48270 length 50887
- Stream: column 3 section LENGTH start: 99157 length 3374
+ Stream: column 3 section ROW_INDEX start: 88 length 139
+ Stream: column 1 section DATA start: 227 length 16022
+ Stream: column 2 section DATA start: 16249 length 32028
+ Stream: column 3 section DATA start: 48277 length 50887
+ Stream: column 3 section LENGTH start: 99164 length 3374
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Stripe: offset: 102599 data: 284999 rows: 5000 tail: 68 index: 349
- Stream: column 0 section ROW_INDEX start: 102599 length 10
- Stream: column 1 section ROW_INDEX start: 102609 length 36
- Stream: column 2 section ROW_INDEX start: 102645 length 39
- Stream: column 3 section ROW_INDEX start: 102684 length 264
- Stream: column 1 section DATA start: 102948 length 20029
- Stream: column 2 section DATA start: 122977 length 40035
- Stream: column 3 section DATA start: 163012 length 219588
- Stream: column 3 section LENGTH start: 382600 length 5347
+ Stripe: offset: 102606 data: 284999 rows: 5000 tail: 68 index: 356
+ Stream: column 0 section ROW_INDEX start: 102606 length 10
+ Stream: column 1 section ROW_INDEX start: 102616 length 36
+ Stream: column 2 section ROW_INDEX start: 102652 length 39
+ Stream: column 3 section ROW_INDEX start: 102691 length 271
+ Stream: column 1 section DATA start: 102962 length 20029
+ Stream: column 2 section DATA start: 122991 length 40035
+ Stream: column 3 section DATA start: 163026 length 219588
+ Stream: column 3 section LENGTH start: 382614 length 5347
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Stripe: offset: 388015 data: 491655 rows: 5000 tail: 69 index: 536
- Stream: column 0 section ROW_INDEX start: 388015 length 10
- Stream: column 1 section ROW_INDEX start: 388025 length 36
- Stream: column 2 section ROW_INDEX start: 388061 length 39
- Stream: column 3 section ROW_INDEX start: 388100 length 451
- Stream: column 1 section DATA start: 388551 length 20029
- Stream: column 2 section DATA start: 408580 length 40035
- Stream: column 3 section DATA start: 448615 length 425862
- Stream: column 3 section LENGTH start: 874477 length 5729
+ Stripe: offset: 388029 data: 491655 rows: 5000 tail: 69 index: 544
+ Stream: column 0 section ROW_INDEX start: 388029 length 10
+ Stream: column 1 section ROW_INDEX start: 388039 length 36
+ Stream: column 2 section ROW_INDEX start: 388075 length 39
+ Stream: column 3 section ROW_INDEX start: 388114 length 459
+ Stream: column 1 section DATA start: 388573 length 20029
+ Stream: column 2 section DATA start: 408602 length 40035
+ Stream: column 3 section DATA start: 448637 length 425862
+ Stream: column 3 section LENGTH start: 874499 length 5729
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Stripe: offset: 880275 data: 707368 rows: 5000 tail: 68 index: 677
- Stream: column 0 section ROW_INDEX start: 880275 length 10
- Stream: column 1 section ROW_INDEX start: 880285 length 36
- Stream: column 2 section ROW_INDEX start: 880321 length 39
- Stream: column 3 section ROW_INDEX start: 880360 length 592
- Stream: column 1 section DATA start: 880952 length 20029
- Stream: column 2 section DATA start: 900981 length 40035
- Stream: column 3 section DATA start: 941016 length 641580
- Stream: column 3 section LENGTH start: 1582596 length 5724
+ Stripe: offset: 880297 data: 707368 rows: 5000 tail: 68 index: 691
+ Stream: column 0 section ROW_INDEX start: 880297 length 10
+ Stream: column 1 section ROW_INDEX start: 880307 length 36
+ Stream: column 2 section ROW_INDEX start: 880343 length 39
+ Stream: column 3 section ROW_INDEX start: 880382 length 606
+ Stream: column 1 section DATA start: 880988 length 20029
+ Stream: column 2 section DATA start: 901017 length 40035
+ Stream: column 3 section DATA start: 941052 length 641580
+ Stream: column 3 section LENGTH start: 1582632 length 5724
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Stripe: offset: 1588388 data: 348697 rows: 2000 tail: 67 index: 786
- Stream: column 0 section ROW_INDEX start: 1588388 length 10
- Stream: column 1 section ROW_INDEX start: 1588398 length 36
- Stream: column 2 section ROW_INDEX start: 1588434 length 39
- Stream: column 3 section ROW_INDEX start: 1588473 length 701
- Stream: column 1 section DATA start: 1589174 length 8011
- Stream: column 2 section DATA start: 1597185 length 16014
- Stream: column 3 section DATA start: 1613199 length 322259
- Stream: column 3 section LENGTH start: 1935458 length 2413
+ Stripe: offset: 1588424 data: 348697 rows: 2000 tail: 67 index: 797
+ Stream: column 0 section ROW_INDEX start: 1588424 length 10
+ Stream: column 1 section ROW_INDEX start: 1588434 length 36
+ Stream: column 2 section ROW_INDEX start: 1588470 length 39
+ Stream: column 3 section ROW_INDEX start: 1588509 length 712
+ Stream: column 1 section DATA start: 1589221 length 8011
+ Stream: column 2 section DATA start: 1597232 length 16014
+ Stream: column 3 section DATA start: 1613246 length 322259
+ Stream: column 3 section LENGTH start: 1935505 length 2413
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
- Encoding column 3: DIRECT_V2
\ No newline at end of file
+ Encoding column 3: DIRECT_V2
diff --git ql/src/test/resources/orc-file-dump.out ql/src/test/resources/orc-file-dump.out
index fac5326..5b5eb2c 100644
--- ql/src/test/resources/orc-file-dump.out
+++ ql/src/test/resources/orc-file-dump.out
@@ -8,75 +8,75 @@ Statistics:
Column 0: count: 21000
Column 1: count: 21000 min: -2146993718 max: 2147378179 sum: 193017464403
Column 2: count: 21000 min: -9222758097219661129 max: 9222303228623055266
- Column 3: count: 21000 min: Darkness, max: worst
+ Column 3: count: 21000 min: Darkness, max: worst sum: 81761
Stripes:
- Stripe: offset: 3 data: 63766 rows: 5000 tail: 74 index: 119
+ Stripe: offset: 3 data: 63766 rows: 5000 tail: 74 index: 123
Stream: column 0 section ROW_INDEX start: 3 length 10
Stream: column 1 section ROW_INDEX start: 13 length 35
Stream: column 2 section ROW_INDEX start: 48 length 39
- Stream: column 3 section ROW_INDEX start: 87 length 35
- Stream: column 1 section DATA start: 122 length 20029
- Stream: column 2 section DATA start: 20151 length 40035
- Stream: column 3 section DATA start: 60186 length 3544
- Stream: column 3 section LENGTH start: 63730 length 25
- Stream: column 3 section DICTIONARY_DATA start: 63755 length 133
+ Stream: column 3 section ROW_INDEX start: 87 length 39
+ Stream: column 1 section DATA start: 126 length 20029
+ Stream: column 2 section DATA start: 20155 length 40035
+ Stream: column 3 section DATA start: 60190 length 3544
+ Stream: column 3 section LENGTH start: 63734 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 63759 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2
- Stripe: offset: 63962 data: 63755 rows: 5000 tail: 76 index: 118
- Stream: column 0 section ROW_INDEX start: 63962 length 10
- Stream: column 1 section ROW_INDEX start: 63972 length 34
- Stream: column 2 section ROW_INDEX start: 64006 length 39
- Stream: column 3 section ROW_INDEX start: 64045 length 35
- Stream: column 1 section DATA start: 64080 length 20029
- Stream: column 2 section DATA start: 84109 length 40035
- Stream: column 3 section DATA start: 124144 length 3533
- Stream: column 3 section LENGTH start: 127677 length 25
- Stream: column 3 section DICTIONARY_DATA start: 127702 length 133
+ Stripe: offset: 63966 data: 63755 rows: 5000 tail: 74 index: 122
+ Stream: column 0 section ROW_INDEX start: 63966 length 10
+ Stream: column 1 section ROW_INDEX start: 63976 length 34
+ Stream: column 2 section ROW_INDEX start: 64010 length 39
+ Stream: column 3 section ROW_INDEX start: 64049 length 39
+ Stream: column 1 section DATA start: 64088 length 20029
+ Stream: column 2 section DATA start: 84117 length 40035
+ Stream: column 3 section DATA start: 124152 length 3533
+ Stream: column 3 section LENGTH start: 127685 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 127710 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2
- Stripe: offset: 127911 data: 63766 rows: 5000 tail: 76 index: 120
- Stream: column 0 section ROW_INDEX start: 127911 length 10
- Stream: column 1 section ROW_INDEX start: 127921 length 36
- Stream: column 2 section ROW_INDEX start: 127957 length 39
- Stream: column 3 section ROW_INDEX start: 127996 length 35
- Stream: column 1 section DATA start: 128031 length 20029
- Stream: column 2 section DATA start: 148060 length 40035
- Stream: column 3 section DATA start: 188095 length 3544
- Stream: column 3 section LENGTH start: 191639 length 25
- Stream: column 3 section DICTIONARY_DATA start: 191664 length 133
+ Stripe: offset: 127917 data: 63766 rows: 5000 tail: 74 index: 124
+ Stream: column 0 section ROW_INDEX start: 127917 length 10
+ Stream: column 1 section ROW_INDEX start: 127927 length 36
+ Stream: column 2 section ROW_INDEX start: 127963 length 39
+ Stream: column 3 section ROW_INDEX start: 128002 length 39
+ Stream: column 1 section DATA start: 128041 length 20029
+ Stream: column 2 section DATA start: 148070 length 40035
+ Stream: column 3 section DATA start: 188105 length 3544
+ Stream: column 3 section LENGTH start: 191649 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 191674 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2
- Stripe: offset: 200000 data: 63796 rows: 5000 tail: 74 index: 119
+ Stripe: offset: 200000 data: 63796 rows: 5000 tail: 74 index: 123
Stream: column 0 section ROW_INDEX start: 200000 length 10
Stream: column 1 section ROW_INDEX start: 200010 length 35
Stream: column 2 section ROW_INDEX start: 200045 length 39
- Stream: column 3 section ROW_INDEX start: 200084 length 35
- Stream: column 1 section DATA start: 200119 length 20029
- Stream: column 2 section DATA start: 220148 length 40035
- Stream: column 3 section DATA start: 260183 length 3574
- Stream: column 3 section LENGTH start: 263757 length 25
- Stream: column 3 section DICTIONARY_DATA start: 263782 length 133
+ Stream: column 3 section ROW_INDEX start: 200084 length 39
+ Stream: column 1 section DATA start: 200123 length 20029
+ Stream: column 2 section DATA start: 220152 length 40035
+ Stream: column 3 section DATA start: 260187 length 3574
+ Stream: column 3 section LENGTH start: 263761 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 263786 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2
- Stripe: offset: 263989 data: 12940 rows: 1000 tail: 71 index: 120
- Stream: column 0 section ROW_INDEX start: 263989 length 10
- Stream: column 1 section ROW_INDEX start: 263999 length 36
- Stream: column 2 section ROW_INDEX start: 264035 length 39
- Stream: column 3 section ROW_INDEX start: 264074 length 35
- Stream: column 1 section DATA start: 264109 length 4007
- Stream: column 2 section DATA start: 268116 length 8007
- Stream: column 3 section DATA start: 276123 length 768
- Stream: column 3 section LENGTH start: 276891 length 25
- Stream: column 3 section DICTIONARY_DATA start: 276916 length 133
+ Stripe: offset: 263993 data: 12940 rows: 1000 tail: 71 index: 123
+ Stream: column 0 section ROW_INDEX start: 263993 length 10
+ Stream: column 1 section ROW_INDEX start: 264003 length 36
+ Stream: column 2 section ROW_INDEX start: 264039 length 39
+ Stream: column 3 section ROW_INDEX start: 264078 length 38
+ Stream: column 1 section DATA start: 264116 length 4007
+ Stream: column 2 section DATA start: 268123 length 8007
+ Stream: column 3 section DATA start: 276130 length 768
+ Stream: column 3 section LENGTH start: 276898 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 276923 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2