Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (revision ) @@ -98,6 +98,12 @@ public void seek(long desired) { for(int i = 0; i < bytes.length; ++i) { + if (desired == 0 && bytes[i].remaining() == 0) { + if (LOG.isWarnEnabled()) { + LOG.warn("Attempting seek into empty stream (" + name + ") Skipping stream."); + } + return; + } if (offsets[i] <= desired && desired - offsets[i] < bytes[i].remaining()) { currentOffset = desired; Index: ql/src/test/resources/orc-file-has-null.out IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/test/resources/orc-file-has-null.out (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/test/resources/orc-file-has-null.out (revision ) @@ -48,7 +48,7 @@ Entry 2:count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 Entry 3:count: 0 hasNull: true positions: 0,4,125,0,0,136,488 Entry 4:count: 0 hasNull: true positions: 0,6,125,0,0,136,488 - Stripe: offset: 424 data: 156 rows: 5000 tail: 60 index: 119 + Stripe: offset: 424 data: 156 rows: 5000 tail: 59 index: 119 Stream: column 0 section ROW_INDEX start: 424 length 17 Stream: column 1 section ROW_INDEX start: 441 length 63 Stream: column 2 section ROW_INDEX start: 504 length 39 @@ -57,7 +57,6 @@ Stream: column 2 section PRESENT start: 688 length 11 Stream: column 2 section DATA start: 699 length 0 Stream: column 2 section LENGTH start: 699 length 0 - Stream: column 2 section DICTIONARY_DATA start: 699 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -67,15 +66,15 @@ Entry 2:count: 0 hasNull: true positions: 0,2,120,0,0,0,0 Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 759 data: 186 rows: 5000 tail: 60 index: 148 - Stream: column 0 section ROW_INDEX start: 759 length 17 - Stream: column 1 section ROW_INDEX start: 776 length 63 - Stream: column 2 section ROW_INDEX start: 839 length 68 - Stream: column 1 section DATA start: 907 length 113 - Stream: column 1 section LENGTH start: 1020 length 32 - Stream: column 2 section DATA start: 1052 length 24 - Stream: column 2 section LENGTH start: 1076 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1082 length 11 + Stripe: offset: 758 data: 186 rows: 5000 tail: 60 index: 148 + Stream: column 0 section ROW_INDEX start: 758 length 17 + Stream: column 1 section ROW_INDEX start: 775 length 63 + Stream: column 2 section ROW_INDEX start: 838 length 68 + Stream: column 1 section DATA start: 906 length 113 + Stream: column 1 section LENGTH start: 1019 length 32 + Stream: column 2 section DATA start: 1051 length 24 + Stream: column 2 section LENGTH start: 1075 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1081 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] @@ -85,16 +84,15 @@ Entry 2:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 Entry 3:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 Entry 4:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 - Stripe: offset: 1153 data: 156 rows: 5000 tail: 60 index: 119 - Stream: column 0 section ROW_INDEX start: 1153 length 17 - Stream: column 1 section ROW_INDEX start: 1170 length 63 - Stream: column 2 section ROW_INDEX start: 1233 length 39 - Stream: column 1 section DATA start: 1272 length 113 - Stream: column 1 section LENGTH start: 1385 length 32 - Stream: column 2 section PRESENT start: 1417 length 11 - Stream: column 2 section DATA start: 1428 length 0 - Stream: column 2 section LENGTH start: 1428 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1428 length 0 + Stripe: offset: 1152 data: 156 rows: 5000 tail: 59 index: 119 + Stream: column 0 section ROW_INDEX start: 1152 length 17 + Stream: column 1 section ROW_INDEX start: 1169 length 63 + Stream: column 2 section ROW_INDEX start: 1232 length 39 + Stream: column 1 section DATA start: 1271 length 113 + Stream: column 1 section LENGTH start: 1384 length 32 + Stream: column 2 section PRESENT start: 1416 length 11 + Stream: column 2 section DATA start: 1427 length 0 + Stream: column 2 section LENGTH start: 1427 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -105,6 +103,6 @@ Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1736 bytes +File length: 1735 bytes Padding length: 0 bytes Padding ratio: 0% Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (revision ) @@ -1557,15 +1557,17 @@ StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DICTIONARY_DATA); InStream in = streams.get(name); + if (in != null) { // Guard against empty dictionary stream. - if (in.available() > 0) { - dictionaryBuffer = new DynamicByteArray(64, in.available()); - dictionaryBuffer.readAll(in); - // Since its start of strip invalidate the cache. - dictionaryBufferInBytesCache = null; + if (in.available() > 0) { + dictionaryBuffer = new DynamicByteArray(64, in.available()); + dictionaryBuffer.readAll(in); + // Since its start of strip invalidate the cache. + dictionaryBufferInBytesCache = null; + } + in.close(); } else { dictionaryBuffer = null; } - in.close(); // read the lengths name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (revision ) @@ -1162,6 +1162,13 @@ // Write the dictionary by traversing the red-black tree writing out // the bytes and lengths; and creating the map from the original order // to the final sorted order. + if (dictionary.size() == 0) { + if (LOG.isWarnEnabled()) { + LOG.warn("Empty dictionary. Suppressing dictionary stream."); + } + stringOutput.suppress(); + } + dictionary.visit(new StringRedBlackTree.Visitor() { private int currentId = 0; @Override