diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 1e16f09bc7..6cb578b507 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -37,9 +37,13 @@ import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.AcidInputFormat; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; @@ -57,32 +61,42 @@ * directly read from the base files/insert_only deltas in vectorized row batches. The deleted * rows can then be easily indicated via the 'selected' field of the vectorized row batch. * Refer HIVE-14233 for more details. + * + * + * todo: annotate the plan to indicate which reader is used? */ public class VectorizedOrcAcidRowBatchReader implements org.apache.hadoop.mapred.RecordReader { private static final Logger LOG = LoggerFactory.getLogger(VectorizedOrcAcidRowBatchReader.class); - public org.apache.hadoop.mapred.RecordReader baseReader; - protected VectorizedRowBatchCtx rbCtx; - protected VectorizedRowBatch vectorizedRowBatchBase; + private org.apache.hadoop.mapred.RecordReader baseReader; + private final VectorizedRowBatchCtx rbCtx; + private VectorizedRowBatch vectorizedRowBatchBase; private long offset; private long length; protected float progress = 0.0f; protected Object[] partitionValues; - protected boolean addPartitionCols = true; - private ValidTxnList validTxnList; - protected DeleteEventRegistry deleteEventRegistry; - protected StructColumnVector recordIdColumnVector; - private org.apache.orc.Reader.Options readerOptions; - - public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, + private boolean addPartitionCols = true; + private final ValidTxnList validTxnList; + private final DeleteEventRegistry deleteEventRegistry; + private final StructColumnVector recordIdColumnVector; + private final Reader.Options readerOptions; + private final boolean isOriginal; + private final boolean needRowIds; + //partition root + private final Path rootPath; + private final long rowIdOffset; + private final int bucketProperty; + private RecordReader innerReader; + + VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { - this.init(inputSplit, conf, reporter, Utilities.getVectorizedRowBatchCtx(conf)); + this(inputSplit, conf, reporter, Utilities.getVectorizedRowBatchCtx(conf)); final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, (OrcSplit) inputSplit); // Careful with the range here now, we do not want to read the whole base file like deltas. - final RecordReader innerReader = reader.rowsOptions(readerOptions.range(offset, length)); + innerReader = reader.rowsOptions(readerOptions.range(offset, length)); baseReader = new org.apache.hadoop.mapred.RecordReader() { @Override @@ -121,12 +135,13 @@ public float getProgress() throws IOException { public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter, org.apache.hadoop.mapred.RecordReader baseReader, VectorizedRowBatchCtx rbCtx) throws IOException { - this.init(inputSplit, conf, reporter, rbCtx); + this(inputSplit, conf, reporter, rbCtx); this.baseReader = baseReader; + this.innerReader = null; this.vectorizedRowBatchBase = baseReader.createValue(); } - private void init(InputSplit inputSplit, JobConf conf, Reporter reporter, + private VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter, VectorizedRowBatchCtx rowBatchCtx) throws IOException { this.rbCtx = rowBatchCtx; final boolean isAcidRead = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); @@ -143,8 +158,7 @@ private void init(InputSplit inputSplit, JobConf conf, Reporter reporter, final OrcSplit orcSplit = (OrcSplit) inputSplit; reporter.setStatus(orcSplit.toString()); - readerOptions = OrcInputFormat.createOptionsForReader(conf); - readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions); + readerOptions = OrcRawRecordMerger.createEventOptions(OrcInputFormat.createOptionsForReader(conf)); this.offset = orcSplit.getStart(); this.length = orcSplit.getLength(); @@ -167,25 +181,85 @@ private void init(InputSplit inputSplit, JobConf conf, Reporter reporter, deleteEventReaderOptions.range(0, Long.MAX_VALUE); // Disable SARGs for deleteEventReaders, as SARGs have no meaning. deleteEventReaderOptions.searchArgument(null, null); + DeleteEventRegistry der = null; try { // See if we can load all the delete events from all the delete deltas in memory... - this.deleteEventRegistry = new ColumnizedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); + der = new ColumnizedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); } catch (DeleteEventsOverflowMemoryException e) { // If not, then create a set of hanging readers that do sort-merge to find the next smallest // delete event on-demand. Caps the memory consumption to (some_const * no. of readers). - this.deleteEventRegistry = new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); + der = new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); } - - recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, null, null, null); + this.deleteEventRegistry = der; + isOriginal = orcSplit.isOriginal(); + if(isOriginal) { + recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + new LongColumnVector(), new LongColumnVector(), new LongColumnVector()); + } + else { + recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, null, null, null); + } + needRowIds = needRowIds(rbCtx); + rootPath = orcSplit.getRootDir(); + OffsetAndBucketProperty r = computeOffsetAndBucket(orcSplit, conf, validTxnList);//todo: refactor + rowIdOffset = r.offset; + bucketProperty = r.bucketProperty; } + private static final class OffsetAndBucketProperty { + private final long offset; + private final int bucketProperty; + private OffsetAndBucketProperty(long offset, int bucketProperty) { + this.offset = offset; + this.bucketProperty = bucketProperty; + } + } + /** + * When reading a split of an "original" file and we need to decorate data with ROW__ID + * + * + * we could call getAcidState() here to (anywhere in this class) to figure out + * the offset but this could kill NN since this is done per split... on the other hand we do the same + * OrcRawRecordMerger - that's bad reasoning. + * Where else can this be computed? + * Compute Directory once and store in OrcSplit? this would be more efficient.... + * actually all we need to store is the "offset" for each split + * This is worth doing to make sure there is no perf regression + * + */ + private OffsetAndBucketProperty computeOffsetAndBucket( + OrcSplit split, JobConf conf,ValidTxnList validTxnList) throws IOException { + if(!(split.isOriginal() && needRowIds)) { + return new OffsetAndBucketProperty(0,0); + } + long rowIdOffset = 0; + int bucketId = AcidUtils.parseBaseOrDeltaBucketFilename(split.getPath(), conf).getBucketId(); + int bucketProperty = BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).statementId(0).bucket(bucketId)); + AcidUtils.Directory directoryState = AcidUtils.getAcidState(split.getRootDir(), conf, validTxnList); + for (HadoopShims.HdfsFileStatusWithId f : directoryState.getOriginalFiles()) { + AcidOutputFormat.Options bucketOptions = + AcidUtils.parseBaseOrDeltaBucketFilename(f.getFileStatus().getPath(), conf); + if (bucketOptions.getBucketId() != bucketId) { + continue; + } + if (f.getFileStatus().getPath().equals(split.getPath())) { + //'f' is the file whence this split is + break; + } + Reader reader = OrcFile.createReader(f.getFileStatus().getPath(), + OrcFile.readerOptions(conf)); + rowIdOffset += reader.getNumberOfRows(); + } + return new OffsetAndBucketProperty(rowIdOffset, bucketProperty); + } /** * Returns whether it is possible to create a valid instance of this class for a given split. * @param conf is the job configuration * @param inputSplit * @return true if it is possible, else false. */ - public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) { + static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) + throws IOException { if (!(inputSplit instanceof OrcSplit)) { return false; // must be an instance of OrcSplit. } @@ -193,20 +267,51 @@ public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, // To simplify the vectorization logic, the vectorized acid row batch reader does not handle // original files for now as they have a different schema than a regular ACID file. final OrcSplit split = (OrcSplit) inputSplit; - if (AcidUtils.getAcidOperationalProperties(conf).isSplitUpdate() && !split.isOriginal()) { + if(!AcidUtils.getAcidOperationalProperties(conf).isSplitUpdate()) { + //todo: throw? this should always be true in Hive 3.0 // When split-update is turned on for ACID, a more optimized vectorized batch reader // can be created. But still only possible when we are *NOT* reading any originals. - return true; + return false; } - return false; // no split-update or possibly reading originals! + if(split.isOriginal()) { + Path[] deleteEvents = getDeleteDeltaDirsFromSplit(split); + if(deleteEvents.length > 0) { + //if we have delete events then we need ROW__IDs to apply them; bail for now + return false; + } + VectorizedRowBatchCtx rbCtx= Utilities.getVectorizedRowBatchCtx(conf); + // if(rbCtx == null || needRowIds(rbCtx)) { + if(rbCtx == null) { + //means this can't be vectorized (e.g. INPUT__FILE__NAME is projected) + //or has to be decorated with ROW__IDs + return false; + } + } + return true; } + private static boolean needRowIds(VectorizedRowBatchCtx rbCtx) { + if(rbCtx.getVirtualColumnCount() == 0) { + return false; + } + for(VirtualColumn vc : rbCtx.getNeededVirtualColumns()) { + if(vc == VirtualColumn.ROWID) { + //The query needs ROW__ID: maybe explicitly asked, maybe it's a Delete statement. + //Either way, we need to decorate "original" rows with row__id + return true; + } + } + return false; + } + /** + * ToDo: refactor/merge with {@link OrcInputFormat#getReader(InputSplit, AcidInputFormat.Options)} + */ private static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { Path path = orcSplit.getPath(); Path root; if (orcSplit.hasBase()) { if (orcSplit.isOriginal()) { - root = path.getParent(); + root = orcSplit.getRootDir(); } else { root = path.getParent().getParent(); } @@ -258,11 +363,14 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti selectedBitSet.set(0, vectorizedRowBatchBase.size, true); } - // Case 1- find rows which belong to transactions that are not valid. - findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet); + //all "original" data belongs to txnid:0 and is always valid/committed for every reader + if(!isOriginal) { + // Case 1- find rows which belong to transactions that are not valid. + findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet); + } // Case 2- find rows which have been deleted. - this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase.cols, + this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase.cols,//todo: you can't find deleted until you have added row_ids vectorizedRowBatchBase.size, selectedBitSet); if (selectedBitSet.cardinality() == vectorizedRowBatchBase.size) { @@ -283,6 +391,45 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } } + if(isOriginal) { + /* this means the the table was converted to acid table when it already had data. This + * means that the storage layer doesn't have ROW__ID info in it and we have to synthesize it + * on the fly. Major compaction will add ROW__IDs permanently*/ + System.arraycopy(vectorizedRowBatchBase.cols, 0, value.cols, 0, value.getDataColumnCount()); + if(needRowIds) { + if(innerReader == null) { + throw new IllegalStateException(getClass().getName() + " requires " + + org.apache.orc.RecordReader.class + + " to handle original files that require ROW__IDs: " + rootPath); + } + //RecordIdentifier.transactionId + recordIdColumnVector.fields[0].noNulls = true; + recordIdColumnVector.fields[0].isRepeating = true; + //all "original" is considered written by txnid:0 which committed + ((LongColumnVector)recordIdColumnVector.fields[0]).vector[0] = 0; + + /** + * RecordIdentifier.bucketId (actually bucketProperty: see {@link BucketCodec}) + */ + recordIdColumnVector.fields[1].noNulls = true; + recordIdColumnVector.fields[1].isRepeating = true; + ((LongColumnVector)recordIdColumnVector.fields[1]).vector[0] = bucketProperty; + + //RecordIdentifier.rowId + recordIdColumnVector.fields[2].noNulls = true; + recordIdColumnVector.fields[2].isRepeating = false; + long[] rowIdVector = ((LongColumnVector)recordIdColumnVector.fields[2]).vector; + for(int i = 0; i < vectorizedRowBatchBase.size; i++) { + //baseReader.getRowNumber() seems to point at the start of the batch + rowIdVector[i] = this.rowIdOffset + innerReader.getRowNumber() + i; + } + + rbCtx.setRecordIdColumnVector(recordIdColumnVector); + } + progress = baseReader.getProgress(); + return true; + } + // Finally, link up the columnVector from the base VectorizedRowBatch to outgoing batch. // NOTE: We only link up the user columns and not the ACID metadata columns because this // vectorized code path is not being used in cases of update/delete, when the metadata columns @@ -292,7 +439,8 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti StructColumnVector payloadStruct = (StructColumnVector) vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW]; // Transfer columnVector objects from base batch to outgoing batch. System.arraycopy(payloadStruct.fields, 0, value.cols, 0, value.getDataColumnCount()); - if (rbCtx != null) { + if (rbCtx != null) {//why would this be null? + //why do this if the query is not asking for ROW__ID? recordIdColumnVector.fields[0] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]; recordIdColumnVector.fields[1] = vectorizedRowBatchBase.cols[OrcRecordUpdater.BUCKET]; recordIdColumnVector.fields[2] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW_ID]; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java index 885ef83381..90403e1b8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java @@ -40,7 +40,12 @@ * support tables and partitions stored in the ACID format. It works by using * the non-vectorized ACID reader and moving the data into a vectorized row * batch. + * + * Is there a reason to still have this when we have VectorizedOrcAcidRowBatchReader? + * Once VectorizedOrcAcidRowBatchReader handles isOriginal, there is really no reason + * to have this, right? */ +@Deprecated public class VectorizedOrcAcidRowReader implements org.apache.hadoop.mapred.RecordReader { diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 39d6b2b414..ced0325e95 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql; -import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; @@ -37,7 +36,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.txn.AcidHouseKeeperService; -import org.junit.After; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; @@ -56,9 +54,9 @@ * test AC=true, and AC=false with commit/rollback/exception and test resulting data. * * Can also test, calling commit in AC=true mode, etc, toggling AC... - * - * Tests here are for multi-statement transactions (WIP) and those that don't need to - * run with Acid 2.0 (see subclasses of TestTxnCommands2) + * + * Tests here are for multi-statement transactions (WIP) and others + * Mostly uses bucketed tables */ public class TestTxnCommands extends TxnCommandsBaseForTests { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands.class); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index c827dc4a0e..89c6e5e735 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; @@ -522,5 +523,101 @@ public void testCtasBucketed() throws Exception { // Assert.assertEquals("Wrong msg", ErrorMsg.CTAS_PARCOL_COEXISTENCE.getErrorCode(), cpr.getErrorCode()); Assert.assertTrue(cpr.getErrorMessage().contains("CREATE-TABLE-AS-SELECT does not support")); } + @Test + public void testVectorizedWithDelete() throws Exception { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none"); + //this enables vectorization of ROW__ID + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);//HIVE-12631 + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='true')"); + runStatementOnDriver("insert into T(a,b) values(1,2),(3,4)"); + runStatementOnDriver("delete from T where b = 4"); + List rs = runStatementOnDriver("select a, b from T"); + Assert.assertEquals(1, rs.size()); + } + @Ignore("Gopal's use case") + @Test + public void testNonAcidCtasToAcid() throws Exception { + runStatementOnDriver("drop table if exists customer"); + // runStatementOnDriver("create table customer stored as orc as select * from tpcds_bin_partitioned_orc_1000.customer"); + int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; + runStatementOnDriver("insert into " + TxnCommandsBaseForTests.Table.ACIDTBL + makeValuesClause(values)); + runStatementOnDriver("create table customer stored as orc as select * from " + Table.ACIDTBL); + runStatementOnDriver("alter table customer SET TBLPROPERTIES('transactional'='true')"); + runStatementOnDriver("alter table customer compact 'major'"); + TestTxnCommands2.runWorker(hiveConf); + List rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from customer order by a, b, INPUT__FILE__NAME"); + String[][] expected = { + }; + checkExpected(rs, expected,""); + } + /** + * maybe there is no issue surfacing this if we only have 1 vectroized reader for acid... + * need to handle case with delete events for this*/ + @Test + public void testNonAcidToAcidVectorzied() throws Exception { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none"); + //this enables vectorization of ROW__ID + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);//HIVE-12631 + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T(a int, b int) stored as orc"); + int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; + runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values)); + //, 'transactional_properties'='default' + runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')"); + //this uses VectorizedOrcAcidRowBatchReader + List rs = runStatementOnDriver("select a from T where b > 6 order by a"); + String[][] expected = { + {"6",""}, + {"9",""}, + }; + checkExpected(rs, expected, "After conversion"); + Assert.assertEquals(Integer.toString(6), rs.get(0)); + Assert.assertEquals(Integer.toString(9), rs.get(1)); + + //why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level + //this uses VectorizedOrcAcidRowBatchReader + rs = runStatementOnDriver("select ROW__ID, a from T where b > 6 order by a"); + String[][] expected1 = { + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}","6"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}","9"} + }; + checkExpected(rs, expected1, "After conversion with VC1"); + + //this uses VectorizedOrcAcidRowBatchReader + rs = runStatementOnDriver("select ROW__ID, a from T where b > 0 order by a"); + String[][] expected2 = { + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}","1"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}","2"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}","5"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}","6"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}","9"} + }; + checkExpected(rs, expected2, "After conversion with VC2"); + + //doesn't vectorize (uses neither of the Vectorzied Acid readers) + rs = runStatementOnDriver("select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a"); + Assert.assertEquals("", 2, rs.size()); + String[][] expected3 = { + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0"} + }; + checkExpected(rs, expected3, "After non-vectorized read"); + Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); + + runStatementOnDriver("update T set b = 17 where a = 1"); + //this should use VectorizedOrcAcidRowReader + rs = runStatementOnDriver("select ROW__ID, b from T where b > 0 order by a"); + String[][] expected4 = { + {"{\"transactionid\":21,\"bucketid\":536870912,\"rowid\":0}","17"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}","4"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}","6"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}","8"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}","10"} + }; + checkExpected(rs, expected4, "After conversion with VC4"); + } }