diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index d177e3f..889bd58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -147,6 +147,12 @@ public Object next(Object previous) throws IOException { public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException { // If the user hasn't been reading by row, use the fast path. if (rowInBatch >= batch.size) { + if (batch.size > 0) { + // the local batch has been consumed entirely, reset it + batch.reset(); + } + baseRow = super.getRowNumber(); + rowInBatch = 0; return super.nextBatch(theirBatch); } copyIntoBatch(theirBatch, batch, rowInBatch); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 2071d13..aa99e57 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -154,9 +154,13 @@ private void checkVectorizedReader() throws Exception { VectorizedRowBatch batch = reader.getSchema().createRowBatchV2(); OrcStruct row = null; + long lastRowNumber = -1; // Check Vectorized ORC reader against ORC row reader while (vrr.nextBatch(batch)) { + Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber()); for (int i = 0; i < batch.size; i++) { + Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i); + lastRowNumber = rr.getRowNumber(); row = (OrcStruct) rr.next(row); for (int j = 0; j < batch.cols.length; j++) { Object a = (row.getFieldValue(j));