Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (revision ) @@ -98,6 +98,12 @@ public void seek(long desired) { for(int i = 0; i < bytes.length; ++i) { + if (desired == 0 && bytes[i].remaining() == 0) { + if (LOG.isWarnEnabled()) { + LOG.warn("Attempting seek into empty stream (" + name + ") Skipping stream."); + } + return; + } if (offsets[i] <= desired && desired - offsets[i] < bytes[i].remaining()) { currentOffset = desired; Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java (revision ) @@ -31,7 +31,7 @@ static final int MIN_DELTA = -128; static final int MAX_LITERAL_SIZE = 128; private static final int MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE; - private final PositionedOutputStream output; + private final OutStream output; private final boolean signed; private final long[] literals = new long[MAX_LITERAL_SIZE]; private int numLiterals = 0; @@ -40,7 +40,7 @@ private int tailRunLength = 0; private SerializationUtils utils; - RunLengthIntegerWriter(PositionedOutputStream output, + RunLengthIntegerWriter(OutStream output, boolean signed) { this.output = output; this.signed = signed; @@ -132,6 +132,11 @@ } } } + } + + @Override + public void suppress() { + this.output.suppress(); } @Override Index: ql/src/test/resources/orc-file-has-null.out IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/test/resources/orc-file-has-null.out (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/test/resources/orc-file-has-null.out (revision ) @@ -48,7 +48,7 @@ Entry 2:count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 Entry 3:count: 0 hasNull: true positions: 0,4,125,0,0,136,488 Entry 4:count: 0 hasNull: true positions: 0,6,125,0,0,136,488 - Stripe: offset: 424 data: 156 rows: 5000 tail: 60 index: 119 + Stripe: offset: 424 data: 156 rows: 5000 tail: 55 index: 119 Stream: column 0 section ROW_INDEX start: 424 length 17 Stream: column 1 section ROW_INDEX start: 441 length 63 Stream: column 2 section ROW_INDEX start: 504 length 39 @@ -56,8 +56,6 @@ Stream: column 1 section LENGTH start: 656 length 32 Stream: column 2 section PRESENT start: 688 length 11 Stream: column 2 section DATA start: 699 length 0 - Stream: column 2 section LENGTH start: 699 length 0 - Stream: column 2 section DICTIONARY_DATA start: 699 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -67,15 +65,15 @@ Entry 2:count: 0 hasNull: true positions: 0,2,120,0,0,0,0 Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 759 data: 186 rows: 5000 tail: 60 index: 148 - Stream: column 0 section ROW_INDEX start: 759 length 17 - Stream: column 1 section ROW_INDEX start: 776 length 63 - Stream: column 2 section ROW_INDEX start: 839 length 68 - Stream: column 1 section DATA start: 907 length 113 - Stream: column 1 section LENGTH start: 1020 length 32 - Stream: column 2 section DATA start: 1052 length 24 - Stream: column 2 section LENGTH start: 1076 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1082 length 11 + Stripe: offset: 754 data: 186 rows: 5000 tail: 60 index: 148 + Stream: column 0 section ROW_INDEX start: 754 length 17 + Stream: column 1 section ROW_INDEX start: 771 length 63 + Stream: column 2 section ROW_INDEX start: 834 length 68 + Stream: column 1 section DATA start: 902 length 113 + Stream: column 1 section LENGTH start: 1015 length 32 + Stream: column 2 section DATA start: 1047 length 24 + Stream: column 2 section LENGTH start: 1071 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1077 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] @@ -85,16 +83,14 @@ Entry 2:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 Entry 3:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 Entry 4:count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 - Stripe: offset: 1153 data: 156 rows: 5000 tail: 60 index: 119 - Stream: column 0 section ROW_INDEX start: 1153 length 17 - Stream: column 1 section ROW_INDEX start: 1170 length 63 - Stream: column 2 section ROW_INDEX start: 1233 length 39 - Stream: column 1 section DATA start: 1272 length 113 - Stream: column 1 section LENGTH start: 1385 length 32 - Stream: column 2 section PRESENT start: 1417 length 11 - Stream: column 2 section DATA start: 1428 length 0 - Stream: column 2 section LENGTH start: 1428 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1428 length 0 + Stripe: offset: 1148 data: 156 rows: 5000 tail: 55 index: 119 + Stream: column 0 section ROW_INDEX start: 1148 length 17 + Stream: column 1 section ROW_INDEX start: 1165 length 63 + Stream: column 2 section ROW_INDEX start: 1228 length 39 + Stream: column 1 section DATA start: 1267 length 113 + Stream: column 1 section LENGTH start: 1380 length 32 + Stream: column 2 section PRESENT start: 1412 length 11 + Stream: column 2 section DATA start: 1423 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -105,6 +101,6 @@ Entry 3:count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4:count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1736 bytes +File length: 1728 bytes Padding length: 0 bytes Padding ratio: 0% Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (revision ) @@ -1557,32 +1557,36 @@ StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DICTIONARY_DATA); InStream in = streams.get(name); + if (in != null) { // Guard against empty dictionary stream. - if (in.available() > 0) { - dictionaryBuffer = new DynamicByteArray(64, in.available()); - dictionaryBuffer.readAll(in); - // Since its start of strip invalidate the cache. - dictionaryBufferInBytesCache = null; + if (in.available() > 0) { + dictionaryBuffer = new DynamicByteArray(64, in.available()); + dictionaryBuffer.readAll(in); + // Since its start of strip invalidate the cache. + dictionaryBufferInBytesCache = null; + } + in.close(); } else { dictionaryBuffer = null; } - in.close(); // read the lengths name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); in = streams.get(name); + if (in != null) { // Guard against empty LENGTH stream. - IntegerReader lenReader = createIntegerReader(encodings.get(columnId) - .getKind(), in, false); - int offset = 0; - if (dictionaryOffsets == null || - dictionaryOffsets.length < dictionarySize + 1) { - dictionaryOffsets = new int[dictionarySize + 1]; - } + IntegerReader lenReader = createIntegerReader(encodings.get(columnId) + .getKind(), in, false); + int offset = 0; + if (dictionaryOffsets == null || + dictionaryOffsets.length < dictionarySize + 1) { + dictionaryOffsets = new int[dictionarySize + 1]; + } - for(int i=0; i < dictionarySize; ++i) { + for (int i = 0; i < dictionarySize; ++i) { - dictionaryOffsets[i] = offset; - offset += (int) lenReader.next(); - } - dictionaryOffsets[dictionarySize] = offset; - in.close(); + dictionaryOffsets[i] = offset; + offset += (int) lenReader.next(); + } + dictionaryOffsets[dictionarySize] = offset; + in.close(); + } // set up the row reader name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (revision ) @@ -630,7 +630,7 @@ return rowIndexEntry; } - IntegerWriter createIntegerWriter(PositionedOutputStream output, + IntegerWriter createIntegerWriter(OutStream output, boolean signed, boolean isDirectV2, StreamFactory writer) { if (isDirectV2) { @@ -882,7 +882,7 @@ StreamFactory writer, boolean nullable) throws IOException { super(columnId, inspector, writer, nullable); - PositionedOutputStream out = writer.createStream(id, + OutStream out = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.isDirectV2 = isNewWriteFormat(writer); this.writer = createIntegerWriter(out, true, isDirectV2, writer); @@ -1162,6 +1162,14 @@ // Write the dictionary by traversing the red-black tree writing out // the bytes and lengths; and creating the map from the original order // to the final sorted order. + if (dictionary.size() == 0) { + if (LOG.isWarnEnabled()) { + LOG.warn("Empty dictionary. Suppressing dictionary stream."); + } + stringOutput.suppress(); + lengthOutput.suppress(); + } + dictionary.visit(new StringRedBlackTree.Visitor() { private int currentId = 0; @Override @@ -1467,7 +1475,7 @@ StreamFactory writer, boolean nullable) throws IOException { super(columnId, inspector, writer, nullable); - PositionedOutputStream out = writer.createStream(id, + OutStream out = writer.createStream(id, OrcProto.Stream.Kind.DATA); this.isDirectV2 = isNewWriteFormat(writer); this.writer = createIntegerWriter(out, true, isDirectV2, writer); Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (revision ) @@ -138,7 +138,7 @@ private int fixedRunLength = 0; private int variableRunLength = 0; private final long[] literals = new long[MAX_SCOPE]; - private final PositionedOutputStream output; + private final OutStream output; private final boolean signed; private EncodingType encoding; private int numLiterals; @@ -160,11 +160,11 @@ private SerializationUtils utils; private boolean alignedBitpacking; - RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) { + RunLengthIntegerWriterV2(OutStream output, boolean signed) { this(output, signed, true); } - RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed, + RunLengthIntegerWriterV2(OutStream output, boolean signed, boolean alignedBitpacking) { this.output = output; this.signed = signed; @@ -816,6 +816,11 @@ } } } + } + + @Override + public void suppress() { + this.output.suppress(); } private void initializeLiterals(long val) { Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerWriter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerWriter.java (revision 30f7a84ab83e7e26d78a2b42159fcb16c72bfe90) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerWriter.java (revision ) @@ -40,6 +40,11 @@ void write(long value) throws IOException; /** + * Suppress underlying stream. + */ + void suppress(); + + /** * Flush the buffer * @throws IOException */