diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 0df82b9..c21272c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2489,15 +2489,23 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, types.get(column).getKind(), stream.getKind(), isCompressed, hasNull[column]); long start = indexes[column].getEntry(group).getPositions(posn); + long nextGroupOffset = 0; + if (group < includedRowGroups.length - 1) { + nextGroupOffset = indexes[column].getEntry(group + 1).getPositions(posn); + } else { + nextGroupOffset = length; + } + // figure out the worst case last location - long end = (group == includedRowGroups.length - 1) ? - length : Math.min(length, - indexes[column].getEntry(group + 1) - .getPositions(posn) - + (isCompressed ? - (OutStream.HEADER_SIZE - + compressionSize) : - WORST_UNCOMPRESSED_SLOP)); + + // if adjacent groups have the same compressed block offset then stretch the slop + // by factor of 2 to safely accommodate the next compression block. + // One for the current compression block and another for the next compression block. + int slopFactor = start == nextGroupOffset ? 2 : 1; + long slop = isCompressed ? slopFactor * (OutStream.HEADER_SIZE + compressionSize) + : WORST_UNCOMPRESSED_SLOP; + long end = (group == includedRowGroups.length - 1) ? length : Math.min(length, + nextGroupOffset + slop); result.add(new DiskRange(offset + start, offset + end)); } }