diff --git a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index f4cc240..a0336d2 100644 --- a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -17765,6 +17765,9 @@ public Builder clearRowIndexStride() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ boolean hasWriterVersion(); @@ -17775,6 +17778,9 @@ public Builder clearRowIndexStride() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ int getWriterVersion(); @@ -18077,6 +18083,9 @@ public long getMetadataLength() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public boolean hasWriterVersion() { @@ -18089,6 +18098,9 @@ public boolean hasWriterVersion() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public int getWriterVersion() { @@ -18759,6 +18771,9 @@ public Builder clearMetadataLength() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public boolean hasWriterVersion() { @@ -18771,6 +18786,9 @@ public boolean hasWriterVersion() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public int getWriterVersion() { @@ -18783,6 +18801,9 @@ public int getWriterVersion() { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public Builder setWriterVersion(int value) { @@ -18798,6 +18819,9 @@ public Builder setWriterVersion(int value) { * Version of the writer: * 0 (or missing) = original * 1 = HIVE-8732 fixed + * 2 = HIVE-4243 (only in Hive 2.x) + * 3 = HIVE-12055 (only in Hive 2.x) + * 4 = HIVE-13083 (fixed) * */ public Builder clearWriterVersion() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index b13dad2..3c37f0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -112,7 +112,10 @@ public int getMinor() { */ public static enum WriterVersion { ORIGINAL(0), - HIVE_8732(1); // corrupted stripe/file maximum column statistics + HIVE_8732(1), // corrupted stripe/file maximum column statistics + HIVE_4243(2), // use real column names from Hive tables (Hive 2.x only) + HIVE_12055(3), // vectorized writer (Hive 2.x only) + HIVE_13083(4); // decimal writer updating present stream wrongly private final int id; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index b5e6ad1..b3a256f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -1689,13 +1689,15 @@ void recordPosition(PositionRecorder recorder) throws IOException { @Override void write(Object obj) throws IOException { - super.write(obj); + HiveDecimal decimal = null; if (obj != null) { - HiveDecimal decimal = ((HiveDecimalObjectInspector) inspector). - getPrimitiveJavaObject(obj); + decimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(obj); if (decimal == null) { - return; + obj = null; } + } + super.write(obj); + if (obj != null) { SerializationUtils.writeBigInteger(valueStream, decimal.unscaledValue()); scaleStream.write(decimal.scale()); @@ -2386,7 +2388,7 @@ private int writePostScript(int footerLength, int metadataLength) throws IOExcep .setMagic(OrcFile.MAGIC) .addVersion(version.getMajor()) .addVersion(version.getMinor()) - .setWriterVersion(OrcFile.WriterVersion.HIVE_8732.getId()); + .setWriterVersion(OrcFile.WriterVersion.HIVE_13083.getId()); if (compress != CompressionKind.NONE) { builder.setCompressionBlockSize(bufferSize); } diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 3b7a9b3..06d0b07 100644 --- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -213,6 +213,9 @@ message PostScript { // Version of the writer: // 0 (or missing) = original // 1 = HIVE-8732 fixed + // 2 = HIVE-4243 (only in Hive 2.x) + // 3 = HIVE-12055 (only in Hive 2.x) + // 4 = HIVE-13083 (fixed) optional uint32 writerVersion = 6; // Leave this last in the record optional string magic = 8000; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 6620a66..137c39f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -25,6 +25,7 @@ import java.io.File; import java.io.IOException; +import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Date; @@ -92,6 +93,14 @@ @RunWith(value = Parameterized.class) public class TestOrcFile { + public static class DecimalStruct { + HiveDecimalWritable dec; + + DecimalStruct(HiveDecimalWritable hdw) { + this.dec = hdw; + } + } + public static class SimpleStruct { BytesWritable bytes1; Text string1; @@ -525,6 +534,42 @@ public void testTimestamp() throws Exception { } @Test + public void testHiveDecimalAllNulls() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + // this is an invalid decimal value, getting HiveDecimal from it will return null + writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.463040009E9".getBytes(), 8))); + writer.addRow(new DecimalStruct(null)); + writer.close(); + + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + StructObjectInspector readerInspector = + (StructObjectInspector) reader.getObjectInspector(); + List fields = readerInspector.getAllStructFieldRefs(); + HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector. + getStructFieldRef("dec").getFieldObjectInspector(); + RecordReader rows = reader.rows(null); + while (rows.hasNext()) { + Object row = rows.next(null); + assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(0)))); + } + + // check the stats + ColumnStatistics[] stats = reader.getStatistics(); + assertEquals(2, stats[0].getNumberOfValues()); + assertEquals(0, stats[1].getNumberOfValues()); + assertEquals(true, stats[1].hasNull()); + } + + @Test public void testStringAndBinaryStatistics() throws Exception { ObjectInspector inspector; diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out b/ql/src/test/resources/orc-file-dump-bloomfilter.out index ed47156..18fd2fb 100644 --- a/ql/src/test/resources/orc-file-dump-bloomfilter.out +++ b/ql/src/test/resources/orc-file-dump-bloomfilter.out @@ -1,5 +1,5 @@ Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 21000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter2.out b/ql/src/test/resources/orc-file-dump-bloomfilter2.out index f8a21d0..fa5cc2d 100644 --- a/ql/src/test/resources/orc-file-dump-bloomfilter2.out +++ b/ql/src/test/resources/orc-file-dump-bloomfilter2.out @@ -1,5 +1,5 @@ Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 21000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out index 1d0d583..17a964b 100644 --- a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out +++ b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -1,5 +1,5 @@ Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 21000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json index 99ea7da..68fc6c9 100644 --- a/ql/src/test/resources/orc-file-dump.json +++ b/ql/src/test/resources/orc-file-dump.json @@ -1,7 +1,7 @@ { "fileName": "TestFileDump.testDump.orc", "fileVersion": "0.12", - "writerVersion": "HIVE_8732", + "writerVersion": "HIVE_13083", "numberOfRows": 21000, "compression": "ZLIB", "compressionBufferSize": 4096, diff --git a/ql/src/test/resources/orc-file-dump.out b/ql/src/test/resources/orc-file-dump.out index c6c0955..70f7fbd 100644 --- a/ql/src/test/resources/orc-file-dump.out +++ b/ql/src/test/resources/orc-file-dump.out @@ -1,5 +1,5 @@ Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 21000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out index 8975c37..e98a73f 100644 --- a/ql/src/test/resources/orc-file-has-null.out +++ b/ql/src/test/resources/orc-file-has-null.out @@ -1,5 +1,5 @@ Structure for TestOrcFile.testHasNull.orc -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 20000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out index c741eda..83a54bc 100644 --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -93,7 +93,7 @@ PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 1049 Compression: ZLIB Compression size: 262144 @@ -213,7 +213,7 @@ PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 1049 Compression: ZLIB Compression size: 262144 @@ -345,7 +345,7 @@ PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 1049 Compression: ZLIB Compression size: 262144 diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index d16ca18..7dbeea0 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -517,7 +517,7 @@ PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 242 Compression: SNAPPY Compression size: 4096 @@ -579,7 +579,7 @@ PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 242 Compression: SNAPPY Compression size: 4096 diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out index 0b9d973..27e3c1d 100644 --- a/ql/src/test/results/clientpositive/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/orc_merge11.q.out @@ -72,7 +72,7 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 50000 Compression: ZLIB Compression size: 4096 @@ -133,7 +133,7 @@ ________________________________________________________________________________ -- END ORC FILE DUMP -- -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 50000 Compression: ZLIB Compression size: 4096 @@ -217,7 +217,7 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 100000 Compression: ZLIB Compression size: 4096 diff --git a/ql/src/test/results/clientpositive/tez/orc_merge10.q.out b/ql/src/test/results/clientpositive/tez/orc_merge10.q.out index bb9d9da..a075fe3 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge10.q.out @@ -552,7 +552,7 @@ PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 242 Compression: SNAPPY Compression size: 4096 @@ -629,7 +629,7 @@ PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 242 Compression: SNAPPY Compression size: 4096 diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out index 0b9d973..27e3c1d 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out @@ -72,7 +72,7 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 50000 Compression: ZLIB Compression size: 4096 @@ -133,7 +133,7 @@ ________________________________________________________________________________ -- END ORC FILE DUMP -- -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 50000 Compression: ZLIB Compression size: 4096 @@ -217,7 +217,7 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_8732 +File Version: 0.12 with HIVE_13083 Rows: 100000 Compression: ZLIB Compression size: 4096