diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java index 28f25e5..387fab0 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java @@ -97,6 +97,14 @@ public int compareTo(HiveDecimal dec) { return bd.compareTo(dec.bd); } + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + return bd.equals(((HiveDecimal) obj).bd); + } + public int scale() { return bd.scale(); } diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index 5e31b13..5f08c8b 100644 --- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -1852,6 +1852,569 @@ public Builder clearCount() { // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.BucketStatistics) } + public interface DecimalStatisticsOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string minimum = 1; + boolean hasMinimum(); + String getMinimum(); + + // optional string maximum = 2; + boolean hasMaximum(); + String getMaximum(); + + // optional string sum = 3; + boolean hasSum(); + String getSum(); + } + public static final class DecimalStatistics extends + com.google.protobuf.GeneratedMessage + implements DecimalStatisticsOrBuilder { + // Use DecimalStatistics.newBuilder() to construct. + private DecimalStatistics(Builder builder) { + super(builder); + } + private DecimalStatistics(boolean noInit) {} + + private static final DecimalStatistics defaultInstance; + public static DecimalStatistics getDefaultInstance() { + return defaultInstance; + } + + public DecimalStatistics getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_fieldAccessorTable; + } + + private int bitField0_; + // optional string minimum = 1; + public static final int MINIMUM_FIELD_NUMBER = 1; + private java.lang.Object minimum_; + public boolean hasMinimum() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getMinimum() { + java.lang.Object ref = minimum_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + minimum_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getMinimumBytes() { + java.lang.Object ref = minimum_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + minimum_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string maximum = 2; + public static final int MAXIMUM_FIELD_NUMBER = 2; + private java.lang.Object maximum_; + public boolean hasMaximum() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMaximum() { + java.lang.Object ref = maximum_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + maximum_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getMaximumBytes() { + java.lang.Object ref = maximum_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + maximum_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string sum = 3; + public static final int SUM_FIELD_NUMBER = 3; + private java.lang.Object sum_; + public boolean hasSum() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public String getSum() { + java.lang.Object ref = sum_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + sum_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getSumBytes() { + java.lang.Object ref = sum_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + sum_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + minimum_ = ""; + maximum_ = ""; + sum_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMinimumBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getMaximumBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeBytes(3, getSumBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMinimumBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getMaximumBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(3, getSumBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + minimum_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + maximum_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + sum_ = ""; + bitField0_ = (bitField0_ & ~0x00000004); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDescriptor(); + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics getDefaultInstanceForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance(); + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics build() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics buildPartial() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.minimum_ = minimum_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.maximum_ = maximum_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.sum_ = sum_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics) { + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics other) { + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance()) return this; + if (other.hasMinimum()) { + setMinimum(other.getMinimum()); + } + if (other.hasMaximum()) { + setMaximum(other.getMaximum()); + } + if (other.hasSum()) { + setSum(other.getSum()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + minimum_ = input.readBytes(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + maximum_ = input.readBytes(); + break; + } + case 26: { + bitField0_ |= 0x00000004; + sum_ = input.readBytes(); + break; + } + } + } + } + + private int bitField0_; + + // optional string minimum = 1; + private java.lang.Object minimum_ = ""; + public boolean hasMinimum() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getMinimum() { + java.lang.Object ref = minimum_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + minimum_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setMinimum(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + minimum_ = value; + onChanged(); + return this; + } + public Builder clearMinimum() { + bitField0_ = (bitField0_ & ~0x00000001); + minimum_ = getDefaultInstance().getMinimum(); + onChanged(); + return this; + } + void setMinimum(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + minimum_ = value; + onChanged(); + } + + // optional string maximum = 2; + private java.lang.Object maximum_ = ""; + public boolean hasMaximum() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMaximum() { + java.lang.Object ref = maximum_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + maximum_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setMaximum(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + maximum_ = value; + onChanged(); + return this; + } + public Builder clearMaximum() { + bitField0_ = (bitField0_ & ~0x00000002); + maximum_ = getDefaultInstance().getMaximum(); + onChanged(); + return this; + } + void setMaximum(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000002; + maximum_ = value; + onChanged(); + } + + // optional string sum = 3; + private java.lang.Object sum_ = ""; + public boolean hasSum() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public String getSum() { + java.lang.Object ref = sum_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + sum_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setSum(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + sum_ = value; + onChanged(); + return this; + } + public Builder clearSum() { + bitField0_ = (bitField0_ & ~0x00000004); + sum_ = getDefaultInstance().getSum(); + onChanged(); + return this; + } + void setSum(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000004; + sum_ = value; + onChanged(); + } + + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.DecimalStatistics) + } + + static { + defaultInstance = new DecimalStatistics(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.DecimalStatistics) + } + public interface ColumnStatisticsOrBuilder extends com.google.protobuf.MessageOrBuilder { @@ -1878,6 +2441,11 @@ public Builder clearCount() { boolean hasBucketStatistics(); org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics getBucketStatistics(); org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatisticsOrBuilder getBucketStatisticsOrBuilder(); + + // optional .org.apache.hadoop.hive.ql.io.orc.DecimalStatistics decimalStatistics = 6; + boolean hasDecimalStatistics(); + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics getDecimalStatistics(); + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder getDecimalStatisticsOrBuilder(); } public static final class ColumnStatistics extends com.google.protobuf.GeneratedMessage @@ -1970,12 +2538,26 @@ public boolean hasBucketStatistics() { return bucketStatistics_; } + // optional .org.apache.hadoop.hive.ql.io.orc.DecimalStatistics decimalStatistics = 6; + public static final int DECIMALSTATISTICS_FIELD_NUMBER = 6; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics decimalStatistics_; + public boolean hasDecimalStatistics() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics getDecimalStatistics() { + return decimalStatistics_; + } + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder getDecimalStatisticsOrBuilder() { + return decimalStatistics_; + } + private void initFields() { numberOfValues_ = 0L; intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance(); doubleStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDefaultInstance(); stringStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDefaultInstance(); bucketStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance(); + decimalStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance(); } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -2004,6 +2586,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000010) == 0x00000010)) { output.writeMessage(5, bucketStatistics_); } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + output.writeMessage(6, decimalStatistics_); + } getUnknownFields().writeTo(output); } @@ -2033,6 +2618,10 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeMessageSize(5, bucketStatistics_); } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(6, decimalStatistics_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -2153,6 +2742,7 @@ private void maybeForceBuilderInitialization() { getDoubleStatisticsFieldBuilder(); getStringStatisticsFieldBuilder(); getBucketStatisticsFieldBuilder(); + getDecimalStatisticsFieldBuilder(); } } private static Builder create() { @@ -2187,6 +2777,12 @@ public Builder clear() { bucketStatisticsBuilder_.clear(); } bitField0_ = (bitField0_ & ~0x00000010); + if (decimalStatisticsBuilder_ == null) { + decimalStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance(); + } else { + decimalStatisticsBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000020); return this; } @@ -2261,6 +2857,14 @@ public Builder clone() { } else { result.bucketStatistics_ = bucketStatisticsBuilder_.build(); } + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000020; + } + if (decimalStatisticsBuilder_ == null) { + result.decimalStatistics_ = decimalStatistics_; + } else { + result.decimalStatistics_ = decimalStatisticsBuilder_.build(); + } result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -2292,6 +2896,9 @@ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatist if (other.hasBucketStatistics()) { mergeBucketStatistics(other.getBucketStatistics()); } + if (other.hasDecimalStatistics()) { + mergeDecimalStatistics(other.getDecimalStatistics()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -2364,6 +2971,15 @@ public Builder mergeFrom( setBucketStatistics(subBuilder.buildPartial()); break; } + case 50: { + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder subBuilder = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.newBuilder(); + if (hasDecimalStatistics()) { + subBuilder.mergeFrom(getDecimalStatistics()); + } + input.readMessage(subBuilder, extensionRegistry); + setDecimalStatistics(subBuilder.buildPartial()); + break; + } } } } @@ -2751,6 +3367,96 @@ public Builder clearBucketStatistics() { return bucketStatisticsBuilder_; } + // optional .org.apache.hadoop.hive.ql.io.orc.DecimalStatistics decimalStatistics = 6; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics decimalStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder> decimalStatisticsBuilder_; + public boolean hasDecimalStatistics() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics getDecimalStatistics() { + if (decimalStatisticsBuilder_ == null) { + return decimalStatistics_; + } else { + return decimalStatisticsBuilder_.getMessage(); + } + } + public Builder setDecimalStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics value) { + if (decimalStatisticsBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + decimalStatistics_ = value; + onChanged(); + } else { + decimalStatisticsBuilder_.setMessage(value); + } + bitField0_ |= 0x00000020; + return this; + } + public Builder setDecimalStatistics( + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder builderForValue) { + if (decimalStatisticsBuilder_ == null) { + decimalStatistics_ = builderForValue.build(); + onChanged(); + } else { + decimalStatisticsBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x00000020; + return this; + } + public Builder mergeDecimalStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics value) { + if (decimalStatisticsBuilder_ == null) { + if (((bitField0_ & 0x00000020) == 0x00000020) && + decimalStatistics_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance()) { + decimalStatistics_ = + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.newBuilder(decimalStatistics_).mergeFrom(value).buildPartial(); + } else { + decimalStatistics_ = value; + } + onChanged(); + } else { + decimalStatisticsBuilder_.mergeFrom(value); + } + bitField0_ |= 0x00000020; + return this; + } + public Builder clearDecimalStatistics() { + if (decimalStatisticsBuilder_ == null) { + decimalStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.getDefaultInstance(); + onChanged(); + } else { + decimalStatisticsBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000020); + return this; + } + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder getDecimalStatisticsBuilder() { + bitField0_ |= 0x00000020; + onChanged(); + return getDecimalStatisticsFieldBuilder().getBuilder(); + } + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder getDecimalStatisticsOrBuilder() { + if (decimalStatisticsBuilder_ != null) { + return decimalStatisticsBuilder_.getMessageOrBuilder(); + } else { + return decimalStatistics_; + } + } + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder> + getDecimalStatisticsFieldBuilder() { + if (decimalStatisticsBuilder_ == null) { + decimalStatisticsBuilder_ = new com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatisticsOrBuilder>( + decimalStatistics_, + getParentForChildren(), + isClean()); + decimalStatistics_ = null; + } + return decimalStatisticsBuilder_; + } + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.ColumnStatistics) } @@ -3905,7 +4611,7 @@ public Stream getDefaultInstanceForType() { LENGTH(2, 2), DICTIONARY_DATA(3, 3), DICTIONARY_COUNT(4, 4), - NANO_DATA(5, 5), + SECONDARY(5, 5), ROW_INDEX(6, 6), ; @@ -3914,7 +4620,7 @@ public Stream getDefaultInstanceForType() { public static final int LENGTH_VALUE = 2; public static final int DICTIONARY_DATA_VALUE = 3; public static final int DICTIONARY_COUNT_VALUE = 4; - public static final int NANO_DATA_VALUE = 5; + public static final int SECONDARY_VALUE = 5; public static final int ROW_INDEX_VALUE = 6; @@ -3927,7 +4633,7 @@ public static Kind valueOf(int value) { case 2: return LENGTH; case 3: return DICTIONARY_DATA; case 4: return DICTIONARY_COUNT; - case 5: return NANO_DATA; + case 5: return SECONDARY; case 6: return ROW_INDEX; default: return null; } @@ -3959,7 +4665,7 @@ public Kind findValueByNumber(int number) { } private static final Kind[] VALUES = { - PRESENT, DATA, LENGTH, DICTIONARY_DATA, DICTIONARY_COUNT, NANO_DATA, ROW_INDEX, + PRESENT, DATA, LENGTH, DICTIONARY_DATA, DICTIONARY_COUNT, SECONDARY, ROW_INDEX, }; public static Kind valueOf( @@ -5798,6 +6504,7 @@ public Type getDefaultInstanceForType() { MAP(11, 11), STRUCT(12, 12), UNION(13, 13), + DECIMAL(14, 14), ; public static final int BOOLEAN_VALUE = 0; @@ -5814,6 +6521,7 @@ public Type getDefaultInstanceForType() { public static final int MAP_VALUE = 11; public static final int STRUCT_VALUE = 12; public static final int UNION_VALUE = 13; + public static final int DECIMAL_VALUE = 14; public final int getNumber() { return value; } @@ -5834,6 +6542,7 @@ public static Kind valueOf(int value) { case 11: return MAP; case 12: return STRUCT; case 13: return UNION; + case 14: return DECIMAL; default: return null; } } @@ -5864,7 +6573,7 @@ public Kind findValueByNumber(int number) { } private static final Kind[] VALUES = { - BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, + BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP, LIST, MAP, STRUCT, UNION, DECIMAL, }; public static Kind valueOf( @@ -9550,6 +10259,11 @@ public Builder clearCompressionBlockSize() { com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_fieldAccessorTable; private static com.google.protobuf.Descriptors.Descriptor + internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor; private static com.google.protobuf.GeneratedMessage.FieldAccessorTable @@ -9620,58 +10334,62 @@ public Builder clearCompressionBlockSize() { "maximum\030\002 \001(\001\022\013\n\003sum\030\003 \001(\001\"4\n\020StringStat" + "istics\022\017\n\007minimum\030\001 \001(\t\022\017\n\007maximum\030\002 \001(\t" + "\"%\n\020BucketStatistics\022\021\n\005count\030\001 \003(\004B\002\020\001\"" + - "\340\002\n\020ColumnStatistics\022\026\n\016numberOfValues\030\001" + - " \001(\004\022J\n\rintStatistics\030\002 \001(\01323.org.apache" + - ".hadoop.hive.ql.io.orc.IntegerStatistics", - "\022L\n\020doubleStatistics\030\003 \001(\01322.org.apache." + - "hadoop.hive.ql.io.orc.DoubleStatistics\022L" + - "\n\020stringStatistics\030\004 \001(\01322.org.apache.ha" + - "doop.hive.ql.io.orc.StringStatistics\022L\n\020" + - "bucketStatistics\030\005 \001(\01322.org.apache.hado" + - "op.hive.ql.io.orc.BucketStatistics\"n\n\rRo" + - "wIndexEntry\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nst" + - "atistics\030\002 \001(\01322.org.apache.hadoop.hive." + - "ql.io.orc.ColumnStatistics\"J\n\010RowIndex\022>" + - "\n\005entry\030\001 \003(\0132/.org.apache.hadoop.hive.q", - "l.io.orc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kin" + - "d\030\001 \002(\0162-.org.apache.hadoop.hive.ql.io.o" + - "rc.Stream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length" + - "\030\003 \001(\004\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n" + - "\006LENGTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIO" + - "NARY_COUNT\020\004\022\r\n\tNANO_DATA\020\005\022\r\n\tROW_INDEX" + - "\020\006\"\221\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.or" + - "g.apache.hadoop.hive.ql.io.orc.ColumnEnc" + - "oding.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"\"\n\004Ki" + - "nd\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\"\214\001\n\014Strip", - "eFooter\0229\n\007streams\030\001 \003(\0132(.org.apache.ha" + - "doop.hive.ql.io.orc.Stream\022A\n\007columns\030\002 " + - "\003(\01320.org.apache.hadoop.hive.ql.io.orc.C" + - "olumnEncoding\"\221\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.o" + - "rg.apache.hadoop.hive.ql.io.orc.Type.Kin" + - "d\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 " + - "\003(\t\"\243\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005S" + - "HORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006" + - "DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIME" + - "STAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t", - "\n\005UNION\020\r\"x\n\021StripeInformation\022\016\n\006offset" + - "\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndataLengt" + - "h\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014numberO" + - "fRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004name\030" + - "\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024\n\014heade" + - "rLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007" + - "stripes\030\003 \003(\01323.org.apache.hadoop.hive.q" + - "l.io.orc.StripeInformation\0225\n\005types\030\004 \003(" + - "\0132&.org.apache.hadoop.hive.ql.io.orc.Typ" + - "e\022D\n\010metadata\030\005 \003(\01322.org.apache.hadoop.", - "hive.ql.io.orc.UserMetadataItem\022\024\n\014numbe" + - "rOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org." + - "apache.hadoop.hive.ql.io.orc.ColumnStati" + - "stics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostSc" + - "ript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressio" + - "n\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.o" + - "rc.CompressionKind\022\034\n\024compressionBlockSi" + - "ze\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n" + - "\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" + "B\n\021DecimalStatistics\022\017\n\007minimum\030\001 \001(\t\022\017\n" + + "\007maximum\030\002 \001(\t\022\013\n\003sum\030\003 \001(\t\"\260\003\n\020ColumnSt" + + "atistics\022\026\n\016numberOfValues\030\001 \001(\004\022J\n\rintS", + "tatistics\030\002 \001(\01323.org.apache.hadoop.hive" + + ".ql.io.orc.IntegerStatistics\022L\n\020doubleSt" + + "atistics\030\003 \001(\01322.org.apache.hadoop.hive." + + "ql.io.orc.DoubleStatistics\022L\n\020stringStat" + + "istics\030\004 \001(\01322.org.apache.hadoop.hive.ql" + + ".io.orc.StringStatistics\022L\n\020bucketStatis" + + "tics\030\005 \001(\01322.org.apache.hadoop.hive.ql.i" + + "o.orc.BucketStatistics\022N\n\021decimalStatist" + + "ics\030\006 \001(\01323.org.apache.hadoop.hive.ql.io" + + ".orc.DecimalStatistics\"n\n\rRowIndexEntry\022", + "\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nstatistics\030\002 \001" + + "(\01322.org.apache.hadoop.hive.ql.io.orc.Co" + + "lumnStatistics\"J\n\010RowIndex\022>\n\005entry\030\001 \003(" + + "\0132/.org.apache.hadoop.hive.ql.io.orc.Row" + + "IndexEntry\"\331\001\n\006Stream\022;\n\004kind\030\001 \002(\0162-.or" + + "g.apache.hadoop.hive.ql.io.orc.Stream.Ki" + + "nd\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003 \001(\004\"r\n\004Ki" + + "nd\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGTH\020\002\022\023\n" + + "\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONARY_COUNT\020\004" + + "\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006\"\221\001\n\016Colum", + "nEncoding\022C\n\004kind\030\001 \002(\01625.org.apache.had" + + "oop.hive.ql.io.orc.ColumnEncoding.Kind\022\026" + + "\n\016dictionarySize\030\002 \001(\r\"\"\n\004Kind\022\n\n\006DIRECT" + + "\020\000\022\016\n\nDICTIONARY\020\001\"\214\001\n\014StripeFooter\0229\n\007s" + + "treams\030\001 \003(\0132(.org.apache.hadoop.hive.ql" + + ".io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org.ap" + + "ache.hadoop.hive.ql.io.orc.ColumnEncodin" + + "g\"\236\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache.ha" + + "doop.hive.ql.io.orc.Type.Kind\022\024\n\010subtype" + + "s\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\"\260\001\n\004Kind", + "\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003IN" + + "T\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006" + + "STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004L" + + "IST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n" + + "\007DECIMAL\020\016\"x\n\021StripeInformation\022\016\n\006offse" + + "t\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndataLeng" + + "th\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014number" + + "OfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004name" + + "\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024\n\014head" + + "erLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n", + "\007stripes\030\003 \003(\01323.org.apache.hadoop.hive." + + "ql.io.orc.StripeInformation\0225\n\005types\030\004 \003" + + "(\0132&.org.apache.hadoop.hive.ql.io.orc.Ty" + + "pe\022D\n\010metadata\030\005 \003(\01322.org.apache.hadoop" + + ".hive.ql.io.orc.UserMetadataItem\022\024\n\014numb" + + "erOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org" + + ".apache.hadoop.hive.ql.io.orc.ColumnStat" + + "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostS" + + "cript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressi" + + "on\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.", + "orc.CompressionKind\022\034\n\024compressionBlockS" + + "ize\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" + + "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -9710,16 +10428,24 @@ public Builder clearCompressionBlockSize() { new java.lang.String[] { "Count", }, org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.Builder.class); - internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor = + internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_descriptor = getDescriptor().getMessageTypes().get(4); + internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_org_apache_hadoop_hive_ql_io_orc_DecimalStatistics_descriptor, + new java.lang.String[] { "Minimum", "Maximum", "Sum", }, + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.class, + org.apache.hadoop.hive.ql.io.orc.OrcProto.DecimalStatistics.Builder.class); + internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor = + getDescriptor().getMessageTypes().get(5); internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor, - new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", }, + new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", }, org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor = - getDescriptor().getMessageTypes().get(5); + getDescriptor().getMessageTypes().get(6); internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor, @@ -9727,7 +10453,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_descriptor = - getDescriptor().getMessageTypes().get(6); + getDescriptor().getMessageTypes().get(7); internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndex_descriptor, @@ -9735,7 +10461,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor = - getDescriptor().getMessageTypes().get(7); + getDescriptor().getMessageTypes().get(8); internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Stream_descriptor, @@ -9743,7 +10469,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor = - getDescriptor().getMessageTypes().get(8); + getDescriptor().getMessageTypes().get(9); internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnEncoding_descriptor, @@ -9751,7 +10477,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor = - getDescriptor().getMessageTypes().get(9); + getDescriptor().getMessageTypes().get(10); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_StripeFooter_descriptor, @@ -9759,7 +10485,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor = - getDescriptor().getMessageTypes().get(10); + getDescriptor().getMessageTypes().get(11); internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor, @@ -9767,7 +10493,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor = - getDescriptor().getMessageTypes().get(11); + getDescriptor().getMessageTypes().get(12); internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor, @@ -9775,7 +10501,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeInformation.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeInformation.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor = - getDescriptor().getMessageTypes().get(12); + getDescriptor().getMessageTypes().get(13); internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_UserMetadataItem_descriptor, @@ -9783,7 +10509,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor = - getDescriptor().getMessageTypes().get(13); + getDescriptor().getMessageTypes().get(14); internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_Footer_descriptor, @@ -9791,7 +10517,7 @@ public Builder clearCompressionBlockSize() { org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder.class); internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor = - getDescriptor().getMessageTypes().get(14); + getDescriptor().getMessageTypes().get(15); internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index bee568e..f91b175 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -420,6 +421,128 @@ public String toString() { } } + private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl + implements DecimalColumnStatistics { + private HiveDecimal minimum = null; + private HiveDecimal maximum = null; + private HiveDecimal sum = HiveDecimal.ZERO; + + DecimalStatisticsImpl() { + } + + DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) { + super(stats); + OrcProto.DecimalStatistics dec = stats.getDecimalStatistics(); + if (dec.hasMaximum()) { + maximum = new HiveDecimal(dec.getMaximum()); + } + if (dec.hasMinimum()) { + minimum = new HiveDecimal(dec.getMinimum()); + } + if (dec.hasSum()) { + sum = new HiveDecimal(dec.getSum()); + } else { + sum = null; + } + } + + @Override + void reset() { + super.reset(); + minimum = null; + maximum = null; + sum = HiveDecimal.ZERO; + } + + @Override + void updateDecimal(HiveDecimal value) { + if (minimum == null) { + minimum = value; + maximum = value; + } else if (minimum.compareTo(value) > 0) { + minimum = value; + } else if (maximum.compareTo(value) < 0) { + maximum = value; + } + if (sum != null) { + try { + sum = sum.add(value); + } catch (NumberFormatException nfe) { + sum = null; + } + } + } + + @Override + void merge(ColumnStatisticsImpl other) { + super.merge(other); + DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other; + if (minimum == null) { + minimum = dec.minimum; + maximum = dec.maximum; + sum = dec.sum; + } else if (dec.minimum != null) { + if (minimum.compareTo(dec.minimum) > 0) { + minimum = dec.minimum; + } else if (maximum.compareTo(dec.maximum) < 0) { + maximum = dec.maximum; + } + if (sum == null || dec.sum == null) { + sum = null; + } else { + sum = sum.add(dec.sum); + } + } + } + + @Override + OrcProto.ColumnStatistics.Builder serialize() { + OrcProto.ColumnStatistics.Builder result = super.serialize(); + OrcProto.DecimalStatistics.Builder dec = + OrcProto.DecimalStatistics.newBuilder(); + if (getNumberOfValues() != 0) { + dec.setMinimum(minimum.toString()); + dec.setMaximum(maximum.toString()); + } + if (sum != null) { + dec.setSum(sum.toString()); + } + result.setDecimalStatistics(dec); + return result; + } + + @Override + public HiveDecimal getMinimum() { + return minimum; + } + + @Override + public HiveDecimal getMaximum() { + return maximum; + } + + @Override + public HiveDecimal getSum() { + return sum; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(super.toString()); + if (getNumberOfValues() != 0) { + buf.append(" min: "); + buf.append(minimum); + buf.append(" max: "); + buf.append(maximum); + if (sum != null) { + buf.append(" sum: "); + buf.append(sum); + } + } + return buf.toString(); + } + } + private long count = 0; ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) { @@ -451,6 +574,10 @@ void updateString(String value) { throw new UnsupportedOperationException("Can't update string"); } + void updateDecimal(HiveDecimal value) { + throw new UnsupportedOperationException("Can't update decimal"); + } + void merge(ColumnStatisticsImpl stats) { count += stats.count; } @@ -492,6 +619,8 @@ static ColumnStatisticsImpl create(ObjectInspector inspector) { return new DoubleStatisticsImpl(); case STRING: return new StringStatisticsImpl(); + case DECIMAL: + return new DecimalStatisticsImpl(); default: return new ColumnStatisticsImpl(); } @@ -509,6 +638,8 @@ static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) { return new DoubleStatisticsImpl(stats); } else if (stats.hasStringStatistics()) { return new StringStatisticsImpl(stats); + } else if (stats.hasDecimalStatistics()) { + return new DecimalStatisticsImpl(stats); } else { return new ColumnStatisticsImpl(stats); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java new file mode 100644 index 0000000..ec6aa43 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * Statistics for decimal columns. + */ +public interface DecimalColumnStatistics extends ColumnStatistics { + + /** + * Get the minimum value for the column. + * @return the minimum value + */ + HiveDecimal getMinimum(); + + /** + * Get the maximum value for the column. + * @return the maximum value + */ + HiveDecimal getMaximum(); + + /** + * Get the sum of the values of the column. + * @return the sum + */ + HiveDecimal getSum(); + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java index c364799..e54a881 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java @@ -337,6 +337,8 @@ static ObjectInspector createObjectInspector(TypeInfo info) { return PrimitiveObjectInspectorFactory.writableStringObjectInspector; case TIMESTAMP: return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; + case DECIMAL: + return PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector; default: throw new IllegalArgumentException("Unknown primitive type " + ((PrimitiveTypeInfo) info).getPrimitiveCategory()); @@ -379,6 +381,8 @@ static ObjectInspector createObjectInspector(int columnId, return PrimitiveObjectInspectorFactory.writableStringObjectInspector; case TIMESTAMP: return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; + case DECIMAL: + return PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector; case STRUCT: return new OrcStructInspector(columnId, types); case UNION: diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index d5ad8f8..d044cd8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -556,7 +557,7 @@ void startStripe(Map streams, data = new RunLengthIntegerReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)), true); nanos = new RunLengthIntegerReader(streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.NANO_DATA)), false); + OrcProto.Stream.Kind.SECONDARY)), false); } @Override @@ -610,6 +611,52 @@ void skipRows(long items) throws IOException { } } + private static class DecimalTreeReader extends TreeReader{ + private InStream valueStream; + private RunLengthIntegerReader scaleStream; + + DecimalTreeReader(int columnId) { + super(columnId); + } + + @Override + void startStripe(Map streams, + List encodings + ) throws IOException { + super.startStripe(streams, encodings); + valueStream = streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA)); + scaleStream = new RunLengthIntegerReader(streams.get( + new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + super.seek(index); + valueStream.seek(index[columnId]); + scaleStream.seek(index[columnId]); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + if (valuePresent) { + return new HiveDecimal(SerializationUtils.readBigInteger(valueStream), + (int) scaleStream.next()); + } + return null; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + for(int i=0; i < items; i++) { + SerializationUtils.readBigInteger(valueStream); + } + scaleStream.skip(items); + } + } + private static class StringTreeReader extends TreeReader { private DynamicByteArray dictionaryBuffer = null; private int dictionarySize; @@ -1024,6 +1071,8 @@ private static TreeReader createTreeReader(int columnId, return new BinaryTreeReader(columnId); case TIMESTAMP: return new TimestampTreeReader(columnId); + case DECIMAL: + return new DecimalTreeReader(columnId); case STRUCT: return new StructTreeReader(columnId, types, included); case LIST: diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java index cfe8262..67762b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.math.BigInteger; final class SerializationUtils { @@ -103,4 +104,85 @@ static void writeDouble(OutputStream output, output.write(((int) (ser >> 48)) & 0xff); output.write(((int) (ser >> 56)) & 0xff); } + + /** + * Write the arbitrarily sized signed BigInteger in vint format. + * + * Signed integers are encoded using the low bit as the sign bit using zigzag + * encoding. + * + * Each byte uses the low 7 bits for data and the high bit for stop/continue. + * + * Bytes are stored LSB first. + * @param output the stream to write to + * @param value the value to output + * @throws IOException + */ + static void writeBigInteger(OutputStream output, + BigInteger value) throws IOException { + // encode the signed number as a positive integer + value = value.shiftLeft(1); + int sign = value.signum(); + if (sign < 0) { + value = value.negate(); + value = value.subtract(BigInteger.ONE); + } + int length = value.bitLength(); + while (true) { + long lowBits = value.longValue() & 0x7fffffffffffffffL; + length -= 63; + // write out the next 63 bits worth of data + for(int i=0; i < 9; ++i) { + // if this is the last byte, leave the high bit off + if (length <= 0 && (lowBits & ~0x7f) == 0) { + output.write((byte) lowBits); + return; + } else { + output.write((byte) (0x80 | (lowBits & 0x7f))); + lowBits >>>= 7; + } + } + value = value.shiftRight(63); + } + } + + /** + * Read the signed arbitrary sized BigInteger BigInteger in vint format + * @param input the stream to read from + * @return the read BigInteger + * @throws IOException + */ + static BigInteger readBigInteger(InputStream input) throws IOException { + BigInteger result = BigInteger.ZERO; + long work = 0; + int offset = 0; + long b; + do { + b = input.read(); + if (b == -1) { + throw new EOFException("Reading BigInteger past EOF from " + input); + } + work |= (0x7f & b) << (offset % 63); + offset += 7; + // if we've read 63 bits, roll them into the result + if (offset == 63) { + result = BigInteger.valueOf(work); + work = 0; + } else if (offset % 63 == 0) { + result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63)); + work = 0; + } + } while (b >= 0x80); + if (work != 0) { + result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63)); + } + // convert back to a signed number + boolean isNegative = result.testBit(0); + if (isNegative) { + result = result.add(BigInteger.ONE); + result = result.negate(); + } + result = result.shiftRight(1); + return result; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 5b7245a..1679d09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -306,7 +308,7 @@ public boolean buildIndex() { private final PositionedOutputStream rowIndexStream; /** - * Create a tree writer + * Create a tree writer. * @param columnId the column id of the column to write * @param inspector the object inspector to use * @param streamFactory limited access to the Writer's data. @@ -867,7 +869,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { this.seconds = new RunLengthIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.DATA), true); this.nanos = new RunLengthIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.NANO_DATA), false); + OrcProto.Stream.Kind.SECONDARY), false); recordPosition(rowIndexPosition); } @@ -916,6 +918,51 @@ void recordPosition(PositionRecorder recorder) throws IOException { } } + private static class DecimalTreeWriter extends TreeWriter { + private final PositionedOutputStream valueStream; + private final RunLengthIntegerWriter scaleStream; + + DecimalTreeWriter(int columnId, + ObjectInspector inspector, + StreamFactory writer, + boolean nullable) throws IOException { + super(columnId, inspector, writer, nullable); + valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA); + scaleStream = new RunLengthIntegerWriter(writer.createStream(id, + OrcProto.Stream.Kind.SECONDARY), true); + recordPosition(rowIndexPosition); + } + + @Override + void write(Object obj) throws IOException { + super.write(obj); + if (obj != null) { + HiveDecimal decimal = ((HiveDecimalObjectInspector) inspector). + getPrimitiveJavaObject(obj); + SerializationUtils.writeBigInteger(valueStream, + decimal.unscaledValue()); + scaleStream.write(decimal.scale()); + indexStatistics.updateDecimal(decimal); + } + } + + @Override + void writeStripe(OrcProto.StripeFooter.Builder builder, + int requiredIndexEntries) throws IOException { + super.writeStripe(builder, requiredIndexEntries); + valueStream.flush(); + scaleStream.flush(); + recordPosition(rowIndexPosition); + } + + @Override + void recordPosition(PositionRecorder recorder) throws IOException { + super.recordPosition(recorder); + valueStream.getPosition(recorder); + scaleStream.getPosition(recorder); + } + } + private static class StructTreeWriter extends TreeWriter { private final List fields; StructTreeWriter(int columnId, @@ -1145,6 +1192,9 @@ private static TreeWriter createTreeWriter(ObjectInspector inspector, case TIMESTAMP: return new TimestampTreeWriter(streamFactory.getNextColumnId(), inspector, streamFactory, nullable); + case DECIMAL: + return new DecimalTreeWriter(streamFactory.getNextColumnId(), + inspector, streamFactory, nullable); default: throw new IllegalArgumentException("Bad primitive category " + ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); @@ -1204,6 +1254,9 @@ private static void writeTypes(OrcProto.Footer.Builder builder, case TIMESTAMP: type.setKind(OrcProto.Type.Kind.TIMESTAMP); break; + case DECIMAL: + type.setKind(OrcProto.Type.Kind.DECIMAL); + break; default: throw new IllegalArgumentException("Unknown primitive category: " + ((PrimitiveObjectInspector) treeWriter.inspector). diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 57f4ea9..048c117 100644 --- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -21,12 +21,19 @@ message BucketStatistics { repeated uint64 count = 1 [packed=true]; } +message DecimalStatistics { + optional string minimum = 1; + optional string maximum = 2; + optional string sum = 3; +} + message ColumnStatistics { optional uint64 numberOfValues = 1; optional IntegerStatistics intStatistics = 2; optional DoubleStatistics doubleStatistics = 3; optional StringStatistics stringStatistics = 4; optional BucketStatistics bucketStatistics = 5; + optional DecimalStatistics decimalStatistics = 6; } message RowIndexEntry { @@ -47,7 +54,7 @@ message Stream { LENGTH = 2; DICTIONARY_DATA = 3; DICTIONARY_COUNT = 4; - NANO_DATA = 5; + SECONDARY = 5; ROW_INDEX = 6; } required Kind kind = 1; @@ -85,6 +92,7 @@ message Type { MAP = 11; STRUCT = 12; UNION = 13; + DECIMAL = 14; } required Kind kind = 1; repeated uint32 subtypes = 2 [packed=true]; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 0ddc9e7..292302b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -51,6 +52,7 @@ import org.junit.rules.TestName; import java.io.File; +import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Timestamp; import java.util.ArrayList; @@ -562,7 +564,7 @@ public void metaData() throws Exception { } /** - * We test union and timestamp separately since we need to make the + * We test union, timestamp, and decimal separately since we need to make the * object inspector manually. (The Hive reflection-based doesn't handle * them properly.) */ @@ -570,8 +572,8 @@ public void metaData() throws Exception { public void testUnionAndTimestamp() throws Exception { List types = new ArrayList(); types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT). - addFieldNames("time").addFieldNames("union"). - addSubtypes(1).addSubtypes(2).build()); + addFieldNames("time").addFieldNames("union").addFieldNames("decimal"). + addSubtypes(1).addSubtypes(2).addSubtypes(5).build()); types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP). build()); types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION). @@ -580,24 +582,32 @@ public void testUnionAndTimestamp() throws Exception { build()); types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING). build()); + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.DECIMAL). + build()); ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = OrcStruct.createObjectInspector(0, types); } + HiveDecimal maxValue = new HiveDecimal("100000000000000000000"); Writer writer = OrcFile.createWriter(fs, testFilePath, inspector, 1000, CompressionKind.NONE, 100, 10000); - OrcStruct row = new OrcStruct(2); + OrcStruct row = new OrcStruct(3); OrcUnion union = new OrcUnion(); row.setFieldValue(1, union); row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00")); + HiveDecimal value = new HiveDecimal("12345678.6547456"); + row.setFieldValue(2, value); union.set((byte) 0, new IntWritable(42)); writer.addRow(row); row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789")); - union.set((byte)1, new Text("hello")); + union.set((byte) 1, new Text("hello")); + value = new HiveDecimal("-5643.234"); + row.setFieldValue(2, value); writer.addRow(row); row.setFieldValue(0, null); row.setFieldValue(1, null); + row.setFieldValue(2, null); writer.addRow(row); row.setFieldValue(1, union); union.set((byte) 0, null); @@ -606,7 +616,10 @@ public void testUnionAndTimestamp() throws Exception { writer.addRow(row); union.set((byte) 0, new IntWritable(200000)); row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00")); + value = new HiveDecimal("100000000000000000000"); + row.setFieldValue(2, value); writer.addRow(row); + Random rand = new Random(42); for(int i=1900; i < 2200; ++i) { row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i)); if ((i & 1) == 0) { @@ -614,11 +627,18 @@ public void testUnionAndTimestamp() throws Exception { } else { union.set((byte) 1, new Text(new Integer(i*i).toString())); } + value = new HiveDecimal(new BigInteger(118, rand), + rand.nextInt(36)); + row.setFieldValue(2, value); + if (maxValue.compareTo(value) < 0) { + maxValue = value; + } writer.addRow(row); } // let's add a lot of constant rows to test the rle row.setFieldValue(0, null); union.set((byte) 0, new IntWritable(1732050807)); + row.setFieldValue(2, null); for(int i=0; i < 1000; ++i) { writer.addRow(row); } @@ -632,6 +652,12 @@ public void testUnionAndTimestamp() throws Exception { Reader reader = OrcFile.createReader(fs, testFilePath); assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(1309, reader.getNumberOfRows()); + DecimalColumnStatistics stats = + (DecimalColumnStatistics) reader.getStatistics()[5]; + assertEquals(303, stats.getNumberOfValues()); + assertEquals(new HiveDecimal("-5643.234"), stats.getMinimum()); + assertEquals(maxValue, stats.getMaximum()); + assertEquals(null, stats.getSum()); int stripeCount = 0; int rowCount = 0; long currentOffset = -1; @@ -656,34 +682,42 @@ public void testUnionAndTimestamp() throws Exception { assertEquals(true, rows.hasNext()); row = (OrcStruct) rows.next(null); inspector = reader.getObjectInspector(); - assertEquals("struct>", + assertEquals("struct,decimal:decimal>", inspector.getTypeName()); assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), row.getFieldValue(0)); union = (OrcUnion) row.getFieldValue(1); assertEquals(0, union.getTag()); assertEquals(new IntWritable(42), union.getObject()); + assertEquals(new HiveDecimal("12345678.6547456"), row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); assertEquals(1, union.getTag()); assertEquals(new Text("hello"), union.getObject()); + assertEquals(new HiveDecimal("-5643.234"), row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(null, row.getFieldValue(0)); assertEquals(null, row.getFieldValue(1)); + assertEquals(null, row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(null, row.getFieldValue(0)); union = (OrcUnion) row.getFieldValue(1); assertEquals(0, union.getTag()); assertEquals(null, union.getObject()); + assertEquals(null, row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(null, row.getFieldValue(0)); assertEquals(1, union.getTag()); assertEquals(null, union.getObject()); + assertEquals(null, row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"), row.getFieldValue(0)); assertEquals(new IntWritable(200000), union.getObject()); + assertEquals(new HiveDecimal("100000000000000000000"), + row.getFieldValue(2)); + rand = new Random(42); for(int i=1900; i < 2200; ++i) { row = (OrcStruct) rows.next(row); assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i), @@ -695,6 +729,8 @@ public void testUnionAndTimestamp() throws Exception { assertEquals(1, union.getTag()); assertEquals(new Text(new Integer(i*i).toString()), union.getObject()); } + assertEquals(new HiveDecimal(new BigInteger(118, rand), + rand.nextInt(36)), row.getFieldValue(2)); } for(int i=0; i < 1000; ++i) { row = (OrcStruct) rows.next(row); @@ -709,6 +745,13 @@ public void testUnionAndTimestamp() throws Exception { assertEquals(false, rows.hasNext()); assertEquals(1.0, rows.getProgress(), 0.00001); assertEquals(reader.getNumberOfRows(), rows.getRowNumber()); + rows.seekToRow(1); + row = (OrcStruct) rows.next(row); + assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), + row.getFieldValue(0)); + assertEquals(1, union.getTag()); + assertEquals(new Text("hello"), union.getObject()); + assertEquals(new HiveDecimal("-5643.234"), row.getFieldValue(2)); rows.close(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java index 3de26fc..7ba2036 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java @@ -21,17 +21,95 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.math.BigInteger; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; public class TestSerializationUtils { + private InputStream fromBuffer(ByteArrayOutputStream buffer) { + return new ByteArrayInputStream(buffer.toByteArray()); + } + @Test - public void TestDoubles() throws Exception { + public void testDoubles() throws Exception { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); SerializationUtils.writeDouble(buffer, 1343822337.759); assertEquals(1343822337.759, - SerializationUtils.readDouble(new - ByteArrayInputStream(buffer.toByteArray())), 0.0001); + SerializationUtils.readDouble(fromBuffer(buffer)), 0.0001); + } + + @Test + public void testBigIntegers() throws Exception { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0)); + assertArrayEquals(new byte[]{0}, buffer.toByteArray()); + assertEquals(0L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1)); + assertArrayEquals(new byte[]{2}, buffer.toByteArray()); + assertEquals(1L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1)); + assertArrayEquals(new byte[]{1}, buffer.toByteArray()); + assertEquals(-1L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50)); + assertArrayEquals(new byte[]{100}, buffer.toByteArray()); + assertEquals(50L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50)); + assertArrayEquals(new byte[]{99}, buffer.toByteArray()); + assertEquals(-50L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + for(int i=-8192; i < 8192; ++i) { + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i)); + assertEquals("compare length for " + i, + i >= -64 && i < 64 ? 1 : 2, buffer.size()); + assertEquals("compare result for " + i, + i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue()); + } + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("123456789abcdef0",16)); + assertEquals(new BigInteger("123456789abcdef0",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("-123456789abcdef0",16)); + assertEquals(new BigInteger("-123456789abcdef0",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + StringBuilder buf = new StringBuilder(); + for(int i=0; i < 256; ++i) { + String num = Integer.toHexString(i); + if (num.length() == 1) { + buf.append('0'); + } + buf.append(num); + } + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger(buf.toString(),16)); + assertEquals(new BigInteger(buf.toString(),16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("ff000000000000000000000000000000000000000000ff",16)); + assertEquals( + new BigInteger("ff000000000000000000000000000000000000000000ff",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + } + + public static void main(String[] args) throws Exception { + TestSerializationUtils test = new TestSerializationUtils(); + test.testDoubles(); + test.testBigIntegers(); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java index e33bdb2..4646a28 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java @@ -246,7 +246,7 @@ public static void main(String[] args) throws Exception { test.test2(); test.test3(); TestSerializationUtils serUtils = new TestSerializationUtils(); - serUtils.TestDoubles(); + serUtils.testDoubles(); TestDynamicArray test6 = new TestDynamicArray(); test6.testByteArray(); test6.testIntArray(); diff --git ql/src/test/queries/clientpositive/decimal_4.q ql/src/test/queries/clientpositive/decimal_4.q new file mode 100644 index 0000000..e8a89c1 --- /dev/null +++ ql/src/test/queries/clientpositive/decimal_4.q @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS DECIMAL_4_1; +DROP TABLE IF EXISTS DECIMAL_4_2; + +CREATE TABLE DECIMAL_4_1(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE; + +CREATE TABLE DECIMAL_4_2(key decimal, value decimal) +STORED AS ORC; + +LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_4_1; + +INSERT OVERWRITE TABLE DECIMAL_4_2 SELECT key, key * 3 FROM DECIMAL_4_1; + +SELECT * FROM DECIMAL_4_1 ORDER BY key, value; + +SELECT * FROM DECIMAL_4_2 ORDER BY key; + +DROP TABLE DECIMAL_4_1; +DROP TABLE DECIMAL_4_2; diff --git ql/src/test/results/clientpositive/decimal_4.q.out ql/src/test/results/clientpositive/decimal_4.q.out new file mode 100644 index 0000000..fc23e76 --- /dev/null +++ ql/src/test/results/clientpositive/decimal_4.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_4_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_4_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_4_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_4_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE DECIMAL_4_1(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DECIMAL_4_1(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DECIMAL_4_1 +PREHOOK: query: CREATE TABLE DECIMAL_4_2(key decimal, value decimal) +STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DECIMAL_4_2(key decimal, value decimal) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DECIMAL_4_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_4_1 +PREHOOK: type: LOAD +PREHOOK: Output: default@decimal_4_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_4_1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@decimal_4_1 +PREHOOK: query: INSERT OVERWRITE TABLE DECIMAL_4_2 SELECT key, key * 3 FROM DECIMAL_4_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_4_1 +PREHOOK: Output: default@decimal_4_2 +POSTHOOK: query: INSERT OVERWRITE TABLE DECIMAL_4_2 SELECT key, key * 3 FROM DECIMAL_4_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_4_1 +POSTHOOK: Output: default@decimal_4_2 +POSTHOOK: Lineage: decimal_4_2.key SIMPLE [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +POSTHOOK: Lineage: decimal_4_2.value EXPRESSION [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +PREHOOK: query: SELECT * FROM DECIMAL_4_1 ORDER BY key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_4_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_4_1 ORDER BY key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_4_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_4_2.key SIMPLE [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +POSTHOOK: Lineage: decimal_4_2.value EXPRESSION [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +-1234567890.123456789 -1234567890 +-4.4E+3 4400 +-1255.49 -1255 +-1.122 -11 +-1.12 -1 +-1.12 -1 +-0.333 0 +-0.33 0 +-0.3 0 +0 0 +0 0 +1E-99 0 +0.01 0 +0.02 0 +0.1 0 +0.2 0 +0.3 0 +0.33 0 +0.333 0 +0.9999999999999999999999999 1 +1 1 +1 1 +1.12 1 +1.122 1 +2 2 +2 2 +3.14 3 +3.14 3 +3.14 3 +3.14 4 +1E+1 10 +2E+1 20 +1E+2 100 +124 124 +125.2 125 +2E+2 200 +1234567890.12345678 1234567890 +1E+99 0 +PREHOOK: query: SELECT * FROM DECIMAL_4_2 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_4_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_4_2 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_4_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_4_2.key SIMPLE [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +POSTHOOK: Lineage: decimal_4_2.value EXPRESSION [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +-1234567890.123456789 -3703703670.370370367 +-4.4E+3 -1.32E+4 +-1255.49 -3766.47 +-1.122 -3.366 +-1.12 -3.36 +-1.12 -3.36 +-0.333 -0.999 +-0.33 -0.99 +-0.3 -0.9 +0 0 +0 0 +1E-99 3E-99 +0.01 0.03 +0.02 0.06 +0.1 0.3 +0.2 0.6 +0.3 0.9 +0.33 0.99 +0.333 0.999 +0.9999999999999999999999999 2.9999999999999999999999997 +1 3 +1 3 +1.12 3.36 +1.122 3.366 +2 6 +2 6 +3.14 9.42 +3.14 9.42 +3.14 9.42 +3.14 9.42 +1E+1 3E+1 +2E+1 6E+1 +1E+2 3E+2 +124 372 +125.2 375.6 +2E+2 6E+2 +1234567890.12345678 3703703670.37037034 +1E+99 3E+99 +PREHOOK: query: DROP TABLE DECIMAL_4_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_4_1 +PREHOOK: Output: default@decimal_4_1 +POSTHOOK: query: DROP TABLE DECIMAL_4_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_4_1 +POSTHOOK: Output: default@decimal_4_1 +POSTHOOK: Lineage: decimal_4_2.key SIMPLE [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +POSTHOOK: Lineage: decimal_4_2.value EXPRESSION [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +PREHOOK: query: DROP TABLE DECIMAL_4_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_4_2 +PREHOOK: Output: default@decimal_4_2 +POSTHOOK: query: DROP TABLE DECIMAL_4_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_4_2 +POSTHOOK: Output: default@decimal_4_2 +POSTHOOK: Lineage: decimal_4_2.key SIMPLE [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ] +POSTHOOK: Lineage: decimal_4_2.value EXPRESSION [(decimal_4_1)decimal_4_1.FieldSchema(name:key, type:decimal, comment:null), ]