diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 8a6a056..e335071 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -115,6 +115,7 @@ import org.apache.hadoop.util.Progressable; import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; +import org.apache.orc.StripeInformation; import org.apache.orc.TypeDescription; import org.junit.Before; import org.junit.Rule; @@ -3951,4 +3952,99 @@ public void testColumnProjectionWithAcid() throws Exception { assertEquals(1000, record); reader.close(); } + + @Test + public void testAcidReadPastLastStripeOffset() throws Exception { + Path baseDir = new Path(workDir, "base_00100"); + testFilePath = new Path(baseDir, "bucket_00000"); + fs.mkdirs(baseDir); + fs.delete(testFilePath, true); + TypeDescription fileSchema = + TypeDescription.fromString("struct,d:string>>"); + + // Create ORC file with small stripe size so we can write multiple stripes. + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(fileSchema) + .compress(org.apache.orc.CompressionKind.NONE) + .callback(new OrcRecordUpdater.KeyIndexBuilder("test")) + .stripeSize(128)); + VectorizedRowBatch batch = fileSchema.createRowBatch(1000); + batch.size = 1000; + StructColumnVector scv = (StructColumnVector)batch.cols[5]; + // operation + batch.cols[0].isRepeating = true; + ((LongColumnVector) batch.cols[0]).vector[0] = 0; + // original transaction + batch.cols[1].isRepeating = true; + ((LongColumnVector) batch.cols[1]).vector[0] = 1; + // bucket + batch.cols[2].isRepeating = true; + ((LongColumnVector) batch.cols[2]).vector[0] = 0; + // current transaction + batch.cols[4].isRepeating = true; + ((LongColumnVector) batch.cols[4]).vector[0] = 1; + + LongColumnVector lcv = (LongColumnVector) + ((StructColumnVector) scv.fields[1]).fields[0]; + for(int r=0; r < 1000; r++) { + // row id + ((LongColumnVector) batch.cols[3]).vector[r] = r; + // a + ((LongColumnVector) scv.fields[0]).vector[r] = r * 42; + // b.c + lcv.vector[r] = r * 10001; + // d + ((BytesColumnVector) scv.fields[2]).setVal(r, + Integer.toHexString(r).getBytes(StandardCharsets.UTF_8)); + } + + // Minimum 5000 rows per stripe. + for (int idx = 0; idx < 8; ++idx) { + writer.addRowBatch(batch); + } + writer.close(); + long fileLength = fs.getFileStatus(testFilePath).getLen(); + + // Find the last stripe. + Reader orcReader = OrcFile.createReader(fs, testFilePath); + List stripes = orcReader.getStripes(); + StripeInformation lastStripe = stripes.get(stripes.size() - 1); + long lastStripeOffset = lastStripe.getOffset(); + long lastStripeLength = lastStripe.getLength(); + + // test with same schema with include + conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct,string"); + conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false"); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2"); + + LOG.info("Last stripe " + stripes.size() + + ", offset " + lastStripeOffset + ", length " + lastStripeLength); + // Specify an OrcSplit that starts beyond the offset of the last stripe. + OrcSplit split = new OrcSplit(testFilePath, null, lastStripeOffset + 1, lastStripeLength, + new String[0], null, false, true, + new ArrayList(), fileLength, fileLength, workDir); + OrcInputFormat inputFormat = new OrcInputFormat(); + AcidInputFormat.RowReader reader = inputFormat.getReader(split, + new AcidInputFormat.Options(conf)); + + int record = 0; + RecordIdentifier id = reader.createKey(); + OrcStruct struct = reader.createValue(); + // Iterate through any records. + // Because our read offset was past the stripe offset, the rows from the last stripe will + // not be read. Thus 0 records. + while (reader.next(id, struct)) { + record += 1; + } + assertEquals(0, record); + + reader.close(); + } }