diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 4c86c6a..c3c9685 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2529,15 +2529,22 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, types.get(column).getKind(), stream.getKind(), isCompressed, hasNull[column]); long start = indexes[column].getEntry(group).getPositions(posn); + final long nextGroupOffset; + if (group < includedRowGroups.length - 1) { + nextGroupOffset = indexes[column].getEntry(group + 1).getPositions(posn); + } else { + nextGroupOffset = length; + } + // figure out the worst case last location - long end = (group == includedRowGroups.length - 1) ? - length : Math.min(length, - indexes[column].getEntry(group + 1) - .getPositions(posn) - + (isCompressed ? - (OutStream.HEADER_SIZE - + compressionSize) : - WORST_UNCOMPRESSED_SLOP)); + + // if adjacent groups have the same compressed block offset then stretch the slop + // by factor of 2 to safely accommodate the next compression block. + // One for the current compression block and another for the next compression block. + final long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + compressionSize) + : WORST_UNCOMPRESSED_SLOP; + long end = (group == includedRowGroups.length - 1) ? length : Math.min(length, + nextGroupOffset + slop); result.add(new DiskRange(offset + start, offset + end)); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index 1762073..083fd3e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -539,8 +539,8 @@ public void testPartialPlanCompressed() throws Exception { result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes, columns, rowGroups, true, encodings, types, 32768); assertThat(result, is(diskRanges(0, 1000, 100, 1000, - 400, 1000, 1000, 11000+32771, - 11000, 21000+32771, 41000, 51000+32771))); + 400, 1000, 1000, 11000+(2*32771), + 11000, 21000+(2*32771), 41000, 100000))); rowGroups = new boolean[]{false, false, false, false, false, true}; result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,