diff --git a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java b/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java index b1577a3..24715c3 100644 --- a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java +++ b/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java @@ -17766,6 +17766,8 @@ public Builder clearRowIndexStride() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ boolean hasWriterVersion(); @@ -17777,6 +17779,8 @@ public Builder clearRowIndexStride() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ int getWriterVersion(); @@ -18080,6 +18084,8 @@ public long getMetadataLength() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public boolean hasWriterVersion() { @@ -18093,6 +18099,8 @@ public boolean hasWriterVersion() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public int getWriterVersion() { @@ -18764,6 +18772,8 @@ public Builder clearMetadataLength() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public boolean hasWriterVersion() { @@ -18777,6 +18787,8 @@ public boolean hasWriterVersion() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public int getWriterVersion() { @@ -18790,6 +18802,8 @@ public int getWriterVersion() { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public Builder setWriterVersion(int value) { @@ -18806,6 +18820,8 @@ public Builder setWriterVersion(int value) { * 0 (or missing) = original * 1 = HIVE-8732 fixed * 2 = HIVE-4243 fixed + * 3 = HIVE-12055 fixed + * 4 = HIVE-13083 fixed * */ public Builder clearWriterVersion() { diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java index 98226f9..56c4dda 100644 --- a/orc/src/java/org/apache/orc/OrcFile.java +++ b/orc/src/java/org/apache/orc/OrcFile.java @@ -105,6 +105,7 @@ public int getMinor() { HIVE_8732(1), // corrupted stripe/file maximum column statistics HIVE_4243(2), // use real column names from Hive tables HIVE_12055(3), // vectorized writer + HIVE_13083(4), // decimal writer updating present stream wrongly // Don't use any magic numbers here except for the below: FUTURE(Integer.MAX_VALUE); // a version from a future writer @@ -142,7 +143,7 @@ public static WriterVersion from(int val) { return values[val]; } } - public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_12055; + public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083; public enum EncodingStrategy { SPEED, COMPRESSION diff --git a/orc/src/protobuf/orc_proto.proto b/orc/src/protobuf/orc_proto.proto index 0b36794..f4935b4 100644 --- a/orc/src/protobuf/orc_proto.proto +++ b/orc/src/protobuf/orc_proto.proto @@ -214,6 +214,8 @@ message PostScript { // 0 (or missing) = original // 1 = HIVE-8732 fixed // 2 = HIVE-4243 fixed + // 3 = HIVE-12055 fixed + // 4 = HIVE-13083 fixed optional uint32 writerVersion = 6; // Leave this last in the record optional string magic = 8000; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index a7e657c..9b89ba8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -25,6 +25,7 @@ import java.io.File; import java.io.IOException; +import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Date; @@ -108,6 +109,14 @@ @RunWith(value = Parameterized.class) public class TestOrcFile { + public static class DecimalStruct { + HiveDecimalWritable dec; + + DecimalStruct(HiveDecimalWritable hdw) { + this.dec = hdw; + } + } + public static class SimpleStruct { BytesWritable bytes1; Text string1; @@ -541,6 +550,109 @@ public void testTimestamp() throws Exception { } @Test + public void testHiveDecimalAllNulls() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + // this is an invalid decimal value, getting HiveDecimal from it will return null + writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.463040009E9".getBytes(), 8))); + writer.addRow(new DecimalStruct(null)); + writer.close(); + + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + StructObjectInspector readerInspector = + (StructObjectInspector) reader.getObjectInspector(); + List fields = readerInspector.getAllStructFieldRefs(); + HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector. + getStructFieldRef("dec").getFieldObjectInspector(); + RecordReader rows = reader.rows(null); + while (rows.hasNext()) { + Object row = rows.next(null); + assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + + // check the stats + ColumnStatistics[] stats = reader.getStatistics(); + assertEquals(2, stats[0].getNumberOfValues()); + assertEquals(0, stats[1].getNumberOfValues()); + } + + @Test + public void testHiveDecimalIsNullReset() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + + // orc creates 1000 batch size to make memory check align with 5000 instead of 5120 + for (int i = 0; i < 1000; i++) { + writer.addRow(new DecimalStruct(null)); + } + + writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.00"))); + writer.addRow(new DecimalStruct(new HiveDecimalWritable("2.00"))); + writer.addRow(new DecimalStruct(new HiveDecimalWritable("3.00"))); + + writer.close(); + + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + StructObjectInspector readerInspector = + (StructObjectInspector) reader.getObjectInspector(); + List fields = readerInspector.getAllStructFieldRefs(); + HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector. + getStructFieldRef("dec").getFieldObjectInspector(); + RecordReader rows = reader.rows(null); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + if (idx < 1000) { + assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + + if (idx == 1000) { + assertEquals(new HiveDecimalWritable(1), + doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + + if (idx == 1001) { + assertEquals(new HiveDecimalWritable(2), + doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + + if (idx == 10002) { + assertEquals(new HiveDecimalWritable(3), + doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + idx++; + } + + // check the stats + ColumnStatistics[] stats = reader.getStatistics(); + assertEquals(1003, stats[0].getNumberOfValues()); + assertEquals(3, stats[1].getNumberOfValues()); + assertEquals(HiveDecimal.create(3), ((DecimalColumnStatistics) stats[1]).getMaximum()); + assertEquals(HiveDecimal.create(1), ((DecimalColumnStatistics) stats[1]).getMinimum()); + assertEquals(HiveDecimal.create(6), ((DecimalColumnStatistics) stats[1]).getSum()); + assertEquals(true, ((DecimalColumnStatistics) stats[1]).hasNull()); + } + + @Test public void testStringAndBinaryStatistics() throws Exception { ObjectInspector inspector; diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index fe8ad85..272fd59 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -108,12 +108,18 @@ public void stringifyValue(StringBuilder buffer, int row) { } public void set(int elementNum, HiveDecimalWritable writeable) { - HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale); - if (hiveDec == null) { + if (writeable == null) { noNulls = false; isNull[elementNum] = true; } else { - vector[elementNum].set(hiveDec); + HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale); + if (hiveDec == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + vector[elementNum].set(hiveDec); + isNull[elementNum] = false; + } } } @@ -124,6 +130,7 @@ public void set(int elementNum, HiveDecimal hiveDec) { isNull[elementNum] = true; } else { vector[elementNum].set(checkedDec); + isNull[elementNum] = false; } }