diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 685b388d84..2b55941437 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -186,6 +186,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ join_nullsafe.q,\ leftsemijoin.q,\ limit_pushdown.q,\ + llap_acid.q,\ + llap_acid_fast.q,\ load_dyn_part1.q,\ load_dyn_part2.q,\ load_dyn_part3.q,\ diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 79ec4edd50..1cf5f49bca 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -138,7 +138,7 @@ InputSplit split, JobConf job, Reporter reporter) throws IOException { boolean useLlapIo = true; if (split instanceof LlapAwareSplit) { - useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(); + useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(job); } if (useLlapIo) return null; @@ -170,9 +170,14 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept RowSchema rowSchema = findTsOp(mapWork).getSchema(); final List colNames = new ArrayList(rowSchema.getSignature().size()); final List colTypes = new ArrayList(rowSchema.getSignature().size()); + boolean hasRowId = false; for (ColumnInfo c : rowSchema.getSignature()) { String columnName = c.getInternalName(); - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) continue; + if (VirtualColumn.ROWID.getName().equals(columnName)) { + hasRowId = true; + } else { + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) continue; + } colNames.add(columnName); colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName())); } @@ -190,10 +195,15 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept } } } - // UNDONE: Virtual column support? + final VirtualColumn[] virtualColumns; + if (hasRowId) { + virtualColumns = new VirtualColumn[] {VirtualColumn.ROWID}; + } else { + virtualColumns = new VirtualColumn[0]; + } return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, - new VirtualColumn[0], new String[0]); + virtualColumns, new String[0]); } static TableScanOperator findTsOp(MapWork mapWork) throws HiveException { diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 5d93884e92..ad2d8e7ba2 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -164,7 +164,7 @@ private LlapRecordReader(MapWork mapWork, JobConf job, FileSplit split, // Create the consumer of encoded data; it will coordinate decoding to CVBs. feedback = rp = cvp.createReadPipeline(this, split, columnIds, sarg, columnNames, counters, schema, sourceInputFormat, sourceSerDe, reporter, job, - mapWork.getPathToPartitionInfo()); + mapWork.getPathToPartitionInfo(), rbCtx); } private static MapWork findMapWork(JobConf job) throws HiveException { diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java index d08dfbbe98..48db8141b8 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -43,5 +44,6 @@ ReadPipeline createReadPipeline(Consumer consumer, FileSplit List columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription readerSchema, InputFormat sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, - JobConf job, Map parts) throws IOException; + JobConf job, Map parts, VectorizedRowBatchCtx rowBatchCtx) + throws IOException; } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java index 945aff31b3..6ee814b217 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java @@ -83,8 +83,8 @@ public GenericColumnVectorProducer(SerDeLowLevelCacheImpl serdeCache, public ReadPipeline createReadPipeline(Consumer consumer, FileSplit split, List columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription schema, InputFormat sourceInputFormat, - Deserializer sourceSerDe, Reporter reporter, JobConf job, Map parts) - throws IOException { + Deserializer sourceSerDe, Reporter reporter, JobConf job, Map parts, + VectorizedRowBatchCtx rowBatchCtx) throws IOException { cacheMetrics.incrCacheReadRequests(); OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer( consumer, columnIds.size(), false, counters, ioMetrics); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java index 6edd84b8b0..ac694da1dc 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java @@ -24,9 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.Pool; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.cache.BufferUsageManager; import org.apache.hadoop.hive.llap.cache.LowLevelCache; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; @@ -36,21 +34,34 @@ import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache; import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; +import org.apache.hadoop.hive.ql.io.orc.OrcSplit; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.FixedSizedObjectPool; import org.apache.orc.TypeDescription; import org.apache.orc.OrcConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class OrcColumnVectorProducer implements ColumnVectorProducer { + private static final Logger LOG = LoggerFactory.getLogger(OrcColumnVectorProducer.class); private final OrcMetadataCache metadataCache; private final LowLevelCache lowLevelCache; @@ -84,14 +95,230 @@ public ReadPipeline createReadPipeline( Consumer consumer, FileSplit split, List columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription readerSchema, InputFormat unused0, Deserializer unused1, - Reporter reporter, JobConf job, Map unused2) throws IOException { + Reporter reporter, JobConf job, Map unused2, + VectorizedRowBatchCtx rowBatchCtx) throws IOException { cacheMetrics.incrCacheReadRequests(); - OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), - _skipCorrupt, counters, ioMetrics); - OrcEncodedDataReader reader = new OrcEncodedDataReader( - lowLevelCache, bufferManager, metadataCache, conf, job, split, columnIds, sarg, - columnNames, edc, counters, readerSchema, tracePool); + + final boolean isAcidRead = HiveConf.getBoolVar(job, + HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + + final boolean isOriginal; + final boolean hasBase; + final boolean hasDelta; + + if (split instanceof OrcSplit) { + final OrcSplit orcSplit = (OrcSplit) split; + isOriginal = orcSplit.isOriginal(); + hasBase = orcSplit.hasBase(); + hasDelta = orcSplit.getDeltas() != null && !orcSplit.getDeltas().isEmpty(); + } else { + isOriginal = true; + hasBase = false; + hasDelta = false; + } + + final AcidUtils.AcidOperationalProperties acidOperationalProperties + = AcidUtils.getAcidOperationalProperties(job); + final boolean isSplitUpdate = acidOperationalProperties.isSplitUpdate(); + OrcEncodedDataConsumer edc = null; + OrcEncodedDataReader reader = null; + + if (isOriginal) { + if (!isAcidRead && !hasDelta) { + LOG.debug("Original scan only"); + edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), _skipCorrupt, counters, + ioMetrics); + reader = new OrcEncodedDataReader(lowLevelCache, bufferManager, metadataCache, conf, job, + split, columnIds, sarg, columnNames, edc, counters, readerSchema, tracePool); + } + } else { + if (hasBase) { + if (hasDelta) { + if (isSplitUpdate) { + LOG.debug("Base with delete deltas"); + final Consumer rowProjectionConsumer = + new AcidToNonAcidConsumer(consumer, columnIds, rowBatchCtx); + final Consumer acidConsumer = new AcidConsumer( + rowProjectionConsumer, split, job, reporter, rowBatchCtx); + edc = new OrcEncodedDataConsumer(acidConsumer, OrcRecordUpdater.FIELDS, _skipCorrupt, + counters, ioMetrics); + reader = new OrcEncodedDataReader(lowLevelCache, bufferManager, metadataCache, conf, + job, split, columnIds, sarg, columnNames, edc, counters, readerSchema, tracePool); + } + } else { + LOG.debug("Base only"); + consumer = new AcidToNonAcidConsumer(consumer, columnIds, rowBatchCtx); + edc = new OrcEncodedDataConsumer(consumer, OrcRecordUpdater.FIELDS, _skipCorrupt, + counters, ioMetrics); + reader = new OrcEncodedDataReader(lowLevelCache, bufferManager, metadataCache, conf, job, + split, columnIds, sarg, columnNames, edc, counters, readerSchema, tracePool); + } + } + } edc.init(reader, reader, reader.getTrace()); return edc; } + + private static void vrbToCvb(VectorizedRowBatch vrb, ColumnVectorBatch cvb) { + // Convert VectorizedRowBatch to ColumnVectorBatch + for (int i = 0; i < vrb.projectionSize; i++) { + cvb.cols[i] = vrb.cols[vrb.projectedColumns[i]]; + } + + // Remove unselected records + cvb.size = vrb.size; + int j = 0; + if (vrb.selectedInUse) { + for (int i = 0; i < vrb.size; i++) { + for (ColumnVector columnVector : cvb.cols) { + columnVector.setElement(j++, vrb.selected[i], columnVector); + } + } + } + } + + private static void cvbToVrb(ColumnVectorBatch cvb, VectorizedRowBatch vrb) { + vrb.size = cvb.size; + LOG.debug("cvb.cols.length = " + cvb.cols.length); + LOG.debug("vrb.cols.length = " + vrb.cols.length); + System.arraycopy(cvb.cols, 0, vrb.cols, 0, cvb.cols.length); + vrb.projectionSize = OrcRecordUpdater.FIELDS; + for (int i = 0; i < OrcRecordUpdater.FIELDS; i++) { + vrb.projectedColumns[i] = i; + } + } + + /** + * CVB -> VRB -> VectorizedOrcAcidRowBatchReader -> VRB -> CVB + * + * An ACID consumer gets a CVB then converts it into a VRB. A vectorized ORC ACID row batch + * reader gets the VRB to merge it with updates then returns a VRB. The record reader consumer + * converts the returned VRB into a CVB and pass it to its next consumer. A record reader + * consumer keeps a vectorized ORC ACID row batch reader untouched as much as possible. + */ + static class AcidConsumer implements Consumer { + final Consumer consumer; + final RecordReader acidReader; + final VectorizedRowBatch processedVrb; + final ColumnVectorBatch outputCvb; + ColumnVectorBatch arrivedCvb; + + AcidConsumer(Consumer consumer, InputSplit split, + JobConf job, Reporter reporter, VectorizedRowBatchCtx rowBatchCtx) throws IOException { + this.consumer = consumer; + this.processedVrb = rowBatchCtx.createVectorizedRowBatch(); + this.outputCvb = new ColumnVectorBatch(processedVrb.numCols); + + final RecordReader baseReader = + new RecordReader() { + @Override + public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { + cvbToVrb(arrivedCvb, value); + return true; + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public VectorizedRowBatch createValue() { + return new VectorizedRowBatch(OrcRecordUpdater.FIELDS); + } + + @Override + public void close() throws IOException { + setDone(); + } + + @Override + public long getPos() throws IOException { + return 0; + } + + @Override + public float getProgress() throws IOException { + return 0; + }}; + this.acidReader = new VectorizedOrcAcidRowBatchReader(split, job, reporter, baseReader, rowBatchCtx); + } + + @Override + public void setDone() { + consumer.setDone(); + } + + @Override + public void setError(Throwable t) { + consumer.setError(t); + } + + @Override + public void consumeData(ColumnVectorBatch dataCvb) { + try { + this.arrivedCvb = dataCvb; + acidReader.next(null, processedVrb); + vrbToCvb(processedVrb, outputCvb); + // Pass to its next consumer + consumer.consumeData(dataCvb); + } catch (IOException e) { + consumer.setError(e); + throw new RuntimeException(e); + } + } + } + + /** + * An ACID to non-ACID consumer passes only selected columns to a next consumer and adds ACID + * columns into a vectorized row batch context. + */ + private static class AcidToNonAcidConsumer implements Consumer { + private final Consumer consumer; + private final ColumnVectorBatch output; + private final List columnIds; + private final VectorizedRowBatchCtx rowBatchCtx; + private final StructColumnVector structColumnVector; + + AcidToNonAcidConsumer(Consumer consumer, List columnIds, + VectorizedRowBatchCtx rowBatchCtx) { + this.consumer = consumer; + this.output = new ColumnVectorBatch(columnIds.size()); + this.columnIds = columnIds; + this.rowBatchCtx = rowBatchCtx; + this.structColumnVector = + new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, null, null, null); + } + + @Override + public void consumeData(ColumnVectorBatch data) { + // Copy only selected columns + output.size = data.size; + final StructColumnVector rowColumnVector = + (StructColumnVector) data.cols[OrcRecordUpdater.ROW]; + final int size = columnIds.size(); + for (int i = 0; i < size; i++) { + output.cols[i] = rowColumnVector.fields[columnIds.get(i)]; + } + + // Copy ACID columns + structColumnVector.fields[0] = data.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]; + structColumnVector.fields[1] = data.cols[OrcRecordUpdater.BUCKET]; + structColumnVector.fields[2] = data.cols[OrcRecordUpdater.ROW_ID]; + rowBatchCtx.setRecordIdColumnVector(structColumnVector); + + // Pass the result to its next consumer + consumer.consumeData(output); + } + + @Override + public void setDone() { + consumer.setDone(); + } + + @Override + public void setError(Throwable t) { + consumer.setError(t); + } + } } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index a2eb82947f..19da0de83a 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; +import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.orc.TypeDescription; import org.apache.orc.impl.DataReaderProperties; import org.apache.orc.impl.OrcIndex; @@ -209,7 +210,18 @@ public OrcEncodedDataReader(LowLevelCache lowLevelCache, BufferUsageManager buff if (readerSchema == null) { readerSchema = fileMetadata.getSchema(); } - globalIncludes = OrcInputFormat.genIncludedColumns(readerSchema, includedColumnIds); + final boolean[] readerIncludes = OrcInputFormat.genIncludedColumns(readerSchema, includedColumnIds); + if (split instanceof OrcSplit) { + final OrcSplit orcSplit = (OrcSplit) split; + final List deltas = orcSplit.getDeltas(); + if (orcSplit.isOriginal() && (deltas == null || deltas.isEmpty())) { + globalIncludes = readerIncludes; + } else { + globalIncludes = OrcInputFormat.shiftReaderIncludedForAcid(readerIncludes); + } + } else { + globalIncludes = readerIncludes; + } // Do not allow users to override zero-copy setting. The rest can be taken from user config. boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(daemonConf); if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) { @@ -217,7 +229,7 @@ public OrcEncodedDataReader(LowLevelCache lowLevelCache, BufferUsageManager buff jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy); } this.jobConf = jobConf; - Reader.Options options = new Reader.Options(jobConf).include(globalIncludes); + Reader.Options options = new Reader.Options(jobConf).include(readerIncludes); evolution = new SchemaEvolution(fileMetadata.getSchema(), readerSchema, options); consumer.setFileMetadata(fileMetadata); consumer.setIncludedColumns(globalIncludes); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index f4499d7ff1..a5bdbef56c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -76,6 +76,7 @@ INTERVAL_YEAR_MONTH (0x100), INTERVAL_DAY_TIME (0x200), BINARY (0x400), + STRUCT (0x800), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), INT_INTERVAL_YEAR_MONTH (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value), @@ -122,6 +123,8 @@ public static ArgumentType fromHiveTypeName(String hiveTypeName) { return INTERVAL_YEAR_MONTH; } else if (lower.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) { return INTERVAL_DAY_TIME; + } else if (VectorizationContext.structTypePattern.matcher(lower).matches()) { + return STRUCT; } else if (lower.equals("void")) { // The old code let void through... return INT_FAMILY; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index 23fdaa554f..fba17a8d7c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -188,7 +188,7 @@ private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int lo colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex); } - Object extractRowColumn( + public Object extractRowColumn( ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) { if (colVector == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index b37dd05ce7..8e70538530 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -66,7 +66,7 @@ public void process(Object row, int tag) throws HiveException { batch.size = Math.min(batch.size, offset + limit - currCount); if (batch.selectedInUse == false) { batch.selectedInUse = true; - batch.selected = new int[batch.size]; + batch.selected = new int[batch.selected.length]; for (int i = 0; i < batch.size - skipSize; i++) { batch.selected[i] = skipSize + i; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 1ac8914516..fb71ef9338 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -811,12 +811,11 @@ public void process(Writable value) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) value; numRows += batch.size; if (hasRowIdentifier) { - - // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader - // UNDONE: Or, perhaps tell it to do it before calling us, ... - // UNDONE: For now, set column to NULL. - - setRowIdentiferToNull(batch); + if (batchContext.getRecordIdColumnVector() == null) { + setRowIdentiferToNull(batch); + } else { + batch.cols[rowIdentifierColumnNum] = batchContext.getRecordIdColumnVector(); + } } } oneRootOperator.process(value, 0); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index fcebb6fbce..47d93331f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -347,6 +347,9 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", Pattern.CASE_INSENSITIVE); + public static final Pattern structTypePattern = Pattern.compile("struct.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private OutputColumnManager ocm; @@ -2715,6 +2718,8 @@ static String getUndecoratedName(String hiveTypeName) throws HiveException { case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: return hiveTypeName; + case STRUCT: + return "Struct"; default: throw new HiveException("Unexpected hive type name " + hiveTypeName); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 90d1372aec..65267240b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -81,6 +81,7 @@ private int partitionColumnCount; private int virtualColumnCount; private VirtualColumn[] neededVirtualColumns; + private StructColumnVector recordIdColumnVector; private String[] scratchColumnTypeNames; @@ -136,6 +137,14 @@ public int getVirtualColumnCount() { return scratchColumnTypeNames; } + public StructColumnVector getRecordIdColumnVector() { + return this.recordIdColumnVector; + } + + public void setRecordIdColumnVector(StructColumnVector recordIdColumnVector) { + this.recordIdColumnVector = recordIdColumnVector; + } + /** * Initializes the VectorizedRowBatch context based on an scratch column type names and * object inspector. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index 1fb70f87e2..d8df5cc9be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -61,6 +61,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.DateUtils; @@ -1462,12 +1464,18 @@ public Object setValue(Object row, ColumnVector column, int columnRow) private static VectorExpressionWriter genVectorExpressionWritableStruct( SettableStructObjectInspector fieldObjInspector) throws HiveException { - return new VectorExpressionWriterMap() { + return new VectorExpressionWriterStruct() { private Object obj; + private VectorExtractRow vectorExtractRow; + private StructTypeInfo structTypeInfo; - public VectorExpressionWriter init(SettableStructObjectInspector objInspector) throws HiveException { + public VectorExpressionWriter init(SettableStructObjectInspector objInspector) + throws HiveException { super.init(objInspector); obj = initValue(null); + vectorExtractRow = new VectorExtractRow(); + structTypeInfo = (StructTypeInfo) + TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; } @@ -1477,15 +1485,43 @@ public Object initValue(Object ignored) { } @Override - public Object writeValue(ColumnVector column, int row) - throws HiveException { - throw new HiveException("Not implemented yet"); + public Object writeValue(ColumnVector column, int row) throws HiveException { + final StructColumnVector structColVector = (StructColumnVector) column; + final SettableStructObjectInspector structOI = + (SettableStructObjectInspector) this.objectInspector; + final List fields = structOI.getAllStructFieldRefs(); + final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + + final int fieldSize = fields.size(); + for (int i = 0; i < fieldSize; i++) { + final StructField structField = fields.get(i); + final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], + fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); + structOI.setStructFieldData(obj, structField, value); + } + return this.obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) - throws HiveException { - throw new HiveException("Not implemented yet"); + public Object setValue(Object field, ColumnVector column, int row) throws HiveException { + if (null == field) { + field = initValue(null); + } + + final StructColumnVector structColVector = (StructColumnVector) column; + final SettableStructObjectInspector structOI = + (SettableStructObjectInspector) this.objectInspector; + final List fields = structOI.getAllStructFieldRefs(); + final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + + final int fieldSize = fields.size(); + for (int i = 0; i < fieldSize; i++) { + final StructField structField = fields.get(i); + final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], + fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); + structOI.setStructFieldData(obj, structField, value); + } + return field; } }.init(fieldObjInspector); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java index ead4678f64..20e134541c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.hive.ql.io; +import org.apache.hadoop.conf.Configuration; + /** * Split that is aware that it could be executed in LLAP. Allows LlapInputFormat to do * a last-minute check to see of LLAP IO pipeline should be used for this particular split. * By default, there is no such check - whatever is sent in is attempted with LLAP IO. */ public interface LlapAwareSplit { - boolean canUseLlapIo(); + boolean canUseLlapIo(Configuration conf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index de49fc84bb..8740e7df8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1580,20 +1580,19 @@ private long computeProjectionSize(List fileTypes, } return ReaderImpl.getRawDataSizeFromColIndices(internalColIds, fileTypes, stats); } + } - private boolean[] shiftReaderIncludedForAcid(boolean[] included) { - // We always need the base row - included[0] = true; - boolean[] newIncluded = new boolean[included.length + OrcRecordUpdater.FIELDS]; - Arrays.fill(newIncluded, 0, OrcRecordUpdater.FIELDS, true); - for(int i= 0; i < included.length; ++i) { - newIncluded[i + OrcRecordUpdater.FIELDS] = included[i]; - } - return newIncluded; + public static boolean[] shiftReaderIncludedForAcid(boolean[] included) { + // We always need the base row + included[0] = true; + boolean[] newIncluded = new boolean[included.length + OrcRecordUpdater.FIELDS]; + Arrays.fill(newIncluded, 0, OrcRecordUpdater.FIELDS, true); + for (int i = 0; i < included.length; ++i) { + newIncluded[i + OrcRecordUpdater.FIELDS] = included[i]; } + return newIncluded; } - /** Class intended to update two values from methods... Java-related cruft. */ @VisibleForTesting static final class CombinedCtx { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index c30e8fe75a..0780a53219 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -71,13 +71,13 @@ final static int UPDATE_OPERATION = 1; final static int DELETE_OPERATION = 2; - final static int OPERATION = 0; - final static int ORIGINAL_TRANSACTION = 1; - final static int BUCKET = 2; - final static int ROW_ID = 3; - final static int CURRENT_TRANSACTION = 4; - final static int ROW = 5; - final static int FIELDS = 6; + public final static int OPERATION = 0; + public final static int ORIGINAL_TRANSACTION = 1; + public final static int BUCKET = 2; + public final static int ROW_ID = 3; + public final static int CURRENT_TRANSACTION = 4; + public final static int ROW = 5; + public final static int FIELDS = 6; final static int DELTA_BUFFER_SIZE = 16 * 1024; final static long DELTA_STRIPE_SIZE = 16 * 1024 * 1024; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24bef3..985bcf1676 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -26,8 +26,11 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidInputFormat; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.ColumnarSplit; import org.apache.hadoop.hive.ql.io.LlapAwareSplit; import org.apache.hadoop.hive.ql.io.SyntheticFileId; @@ -217,8 +220,33 @@ public long getColumnarProjectionSize() { } @Override - public boolean canUseLlapIo() { - return isOriginal && (deltas == null || deltas.isEmpty()); + public boolean canUseLlapIo(Configuration conf) { + final boolean hasDelta = deltas != null && !deltas.isEmpty(); + final boolean isAcidRead = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + final AcidUtils.AcidOperationalProperties acidOperationalProperties + = AcidUtils.getAcidOperationalProperties(conf); + final boolean isSplitUpdate = acidOperationalProperties.isSplitUpdate(); + + if (isOriginal) { + if (!isAcidRead && !hasDelta) { + // Original scan only + return true; + } + } else { + if (hasBase) { + if (hasDelta) { + if (isSplitUpdate) { + // Base with delete deltas + return true; + } + } else { + // Base scan only + return true; + } + } + } + + return false; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 8f807102dd..36c254d624 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -61,7 +61,7 @@ private static final Logger LOG = LoggerFactory.getLogger(VectorizedOrcAcidRowBatchReader.class); - private org.apache.hadoop.hive.ql.io.orc.RecordReader baseReader; + public org.apache.hadoop.mapred.RecordReader baseReader; private VectorizedRowBatchCtx rbCtx; private VectorizedRowBatch vectorizedRowBatchBase; private long offset; @@ -71,10 +71,62 @@ private boolean addPartitionCols = true; private ValidTxnList validTxnList; private DeleteEventRegistry deleteEventRegistry; + private StructColumnVector recordIdColumnVector; + private org.apache.orc.Reader.Options readerOptions; public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { + this.init(inputSplit, conf, reporter, Utilities.getVectorizedRowBatchCtx(conf)); + final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, (OrcSplit) inputSplit); + // Careful with the range here now, we do not want to read the whole base file like deltas. + final RecordReader innerReader = reader.rowsOptions(readerOptions.range(offset, length)); + baseReader = new org.apache.hadoop.mapred.RecordReader() { + + @Override + public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { + return innerReader.nextBatch(value); + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public VectorizedRowBatch createValue() { + return rbCtx.createVectorizedRowBatch(); + } + + @Override + public long getPos() throws IOException { + return 0; + } + + @Override + public void close() throws IOException { + innerReader.close(); + } + + @Override + public float getProgress() throws IOException { + return innerReader.getProgress(); + } + }; + this.vectorizedRowBatchBase = ((RecordReaderImpl) innerReader).createRowBatch(); + } + + public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter, + org.apache.hadoop.mapred.RecordReader baseReader, + VectorizedRowBatchCtx rbCtx) throws IOException { + this.init(inputSplit, conf, reporter, rbCtx); + this.baseReader = baseReader; + this.vectorizedRowBatchBase = baseReader.createValue(); + } + + private void init(InputSplit inputSplit, JobConf conf, Reporter reporter, + VectorizedRowBatchCtx rowBatchCtx) throws IOException { + this.rbCtx = rowBatchCtx; final boolean isAcidRead = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); final AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(conf); @@ -88,28 +140,13 @@ public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, } final OrcSplit orcSplit = (OrcSplit) inputSplit; - rbCtx = Utilities.getVectorizedRowBatchCtx(conf); - reporter.setStatus(orcSplit.toString()); - Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, orcSplit); - Reader.Options readerOptions = OrcInputFormat.createOptionsForReader(conf); + readerOptions = OrcInputFormat.createOptionsForReader(conf); readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions); this.offset = orcSplit.getStart(); this.length = orcSplit.getLength(); - // Careful with the range here now, we do not want to read the whole base file like deltas. - this.baseReader = reader.rowsOptions(readerOptions.range(offset, length)); - - // VectorizedRowBatchBase schema is picked up from the baseReader because the SchemaEvolution - // stuff happens at the ORC layer that understands how to map user schema to acid schema. - if (this.baseReader instanceof RecordReaderImpl) { - this.vectorizedRowBatchBase = ((RecordReaderImpl) this.baseReader).createRowBatch(); - } else { - throw new IOException("Failed to create vectorized row batch for the reader of type " - + this.baseReader.getClass().getName()); - } - int partitionColumnCount = (rbCtx != null) ? rbCtx.getPartitionColumnCount() : 0; if (partitionColumnCount > 0) { partitionValues = new Object[partitionColumnCount]; @@ -136,6 +173,8 @@ public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, // delete event on-demand. Caps the memory consumption to (some_const * no. of readers). this.deleteEventRegistry = new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); } + + recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, null, null, null); } /** @@ -189,7 +228,7 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } addPartitionCols = false; } - if (!baseReader.nextBatch(vectorizedRowBatchBase)) { + if (!baseReader.next(null, vectorizedRowBatchBase)) { return false; } } catch (Exception e) { @@ -250,6 +289,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti StructColumnVector payloadStruct = (StructColumnVector) vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW]; // Transfer columnVector objects from base batch to outgoing batch. System.arraycopy(payloadStruct.fields, 0, value.cols, 0, value.getDataColumnCount()); + if (rbCtx != null) { + recordIdColumnVector.fields[0] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]; + recordIdColumnVector.fields[1] = vectorizedRowBatchBase.cols[OrcRecordUpdater.BUCKET]; + recordIdColumnVector.fields[2] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW_ID]; + rbCtx.setRecordIdColumnVector(recordIdColumnVector); + } progress = baseReader.getProgress(); return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java index a2725b20e9..885ef83381 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.io.orc; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -39,7 +41,7 @@ * the non-vectorized ACID reader and moving the data into a vectorized row * batch. */ -class VectorizedOrcAcidRowReader +public class VectorizedOrcAcidRowReader implements org.apache.hadoop.mapred.RecordReader { private final AcidInputFormat.RowReader innerReader; @@ -49,11 +51,14 @@ private Object[] partitionValues; private final ObjectInspector objectInspector; private final DataOutputBuffer buffer = new DataOutputBuffer(); + private final StructColumnVector recordIdColumnVector; + private final LongColumnVector transactionColumnVector; + private final LongColumnVector bucketColumnVector; + private final LongColumnVector rowIdColumnVector; - VectorizedOrcAcidRowReader(AcidInputFormat.RowReader inner, - Configuration conf, - VectorizedRowBatchCtx vectorizedRowBatchCtx, - FileSplit split) throws IOException { + public VectorizedOrcAcidRowReader(AcidInputFormat.RowReader inner, + Configuration conf, VectorizedRowBatchCtx vectorizedRowBatchCtx, FileSplit split) + throws IOException { this.innerReader = inner; this.key = inner.createKey(); rbCtx = vectorizedRowBatchCtx; @@ -64,6 +69,12 @@ } this.value = inner.createValue(); this.objectInspector = inner.getObjectInspector(); + this.transactionColumnVector = new LongColumnVector(); + this.bucketColumnVector = new LongColumnVector(); + this.rowIdColumnVector = new LongColumnVector(); + this.recordIdColumnVector = + new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + transactionColumnVector, bucketColumnVector, rowIdColumnVector); } @Override @@ -81,19 +92,30 @@ public boolean next(NullWritable nullWritable, try { VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer); + vectorizedRowBatch.size, vectorizedRowBatch, rbCtx, buffer); + addRecordId(vectorizedRowBatch.size, key); + vectorizedRowBatch.size++; while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length && innerReader.next(key, value)) { VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer); + vectorizedRowBatch.size, vectorizedRowBatch, rbCtx, buffer); + addRecordId(vectorizedRowBatch.size, key); + vectorizedRowBatch.size++; } + rbCtx.setRecordIdColumnVector(recordIdColumnVector); } catch (Exception e) { throw new IOException("error iterating", e); } return true; } + private void addRecordId(int index, RecordIdentifier key) { + transactionColumnVector.vector[index] = key.getTransactionId(); + bucketColumnVector.vector[index] = key.getBucketProperty(); + rowIdColumnVector.vector[index] = key.getRowId(); + } + @Override public NullWritable createKey() { return NullWritable.get(); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index c21327f517..c8a0415920 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -2149,7 +2149,11 @@ private static TreeReader createEncodedTreeReader(TypeDescription schema, } else if (batch.hasVectors(columnIndex)) { vectors = batch.getColumnVectors(columnIndex); } else { - throw new AssertionError("Batch has no data for " + columnIndex + ": " + batch); + // A struct column can have a null child column + if (LOG.isDebugEnabled()) { + LOG.debug("Batch has no data for " + columnIndex + ": " + batch); + } + return null; } // EncodedColumnBatch is already decompressed, we don't really need to pass codec. @@ -2712,7 +2716,9 @@ public void seek(PositionProvider[] index) throws IOException { } if (fields != null) { for (TreeReader child : fields) { - child.seek(index); + if (child != null) { + child.seek(index); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 78c511b016..f91ff42cfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -72,6 +72,7 @@ public void analyzeInternal(ASTNode tree) throws SemanticException { if (useSuper) { super.analyzeInternal(tree); } else { + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true); if (!SessionState.get().getTxnMgr().supportsAcid()) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TXNMGR.getMsg()); diff --git ql/src/test/queries/clientpositive/llap_acid_fast.q ql/src/test/queries/clientpositive/llap_acid_fast.q new file mode 100644 index 0000000000..376b19ced1 --- /dev/null +++ ql/src/test/queries/clientpositive/llap_acid_fast.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=true; + +SET hive.llap.io.enabled=true; + +SET hive.exec.orc.default.buffer.size=32768; +SET hive.exec.orc.default.row.index.stride=1000; +SET hive.optimize.index.filter=true; +set hive.fetch.task.conversion=none; + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +DROP TABLE orc_llap_acid_fast; + +CREATE TABLE orc_llap_acid_fast ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); + +insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; + +explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint; + +insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1); + +update orc_llap_acid_fast set cbigint = 2 where cint = 1; + +explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint; + +DROP TABLE orc_llap_acid_fast; diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out new file mode 100644 index 0000000000..99db632139 --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -0,0 +1,321 @@ +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap +POSTHOOK: query: CREATE TABLE orc_llap ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap +PREHOOK: query: insert into table orc_llap partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap +PREHOOK: query: insert into table orc_llap partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +PREHOOK: Output: default@orc_llap@csmallint=1 +PREHOOK: Output: default@orc_llap@csmallint=2 +PREHOOK: Output: default@orc_llap@csmallint=3 +POSTHOOK: query: update orc_llap set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +POSTHOOK: Output: default@orc_llap@csmallint=1 +POSTHOOK: Output: default@orc_llap@csmallint=2 +POSTHOOK: Output: default@orc_llap@csmallint=3 +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Input: default@orc_llap@csmallint=1 +PREHOOK: Input: default@orc_llap@csmallint=2 +PREHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Input: default@orc_llap@csmallint=1 +POSTHOOK: Input: default@orc_llap@csmallint=2 +POSTHOOK: Input: default@orc_llap@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap diff --git ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out new file mode 100644 index 0000000000..2bc26966e4 --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -0,0 +1,362 @@ +PREHOOK: query: DROP TABLE orc_llap_acid_fast +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_acid_fast +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_acid_fast ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_acid_fast +POSTHOOK: query: CREATE TABLE orc_llap_acid_fast ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_acid_fast +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [4, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1] + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1] + dataColumns: cint:int, cbigint:bigint, cfloat:float, cdouble:double + partitionColumnCount: 1 + partitionColumns: csmallint:smallint + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap_acid_fast set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: query: update orc_llap_acid_fast set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +PREHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [4, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1] + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1] + dataColumns: cint:int, cbigint:bigint, cfloat:float, cdouble:double + partitionColumnCount: 1 + partitionColumns: csmallint:smallint + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 1 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap_acid_fast +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Output: default@orc_llap_acid_fast +POSTHOOK: query: DROP TABLE orc_llap_acid_fast +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Output: default@orc_llap_acid_fast diff --git ql/src/test/results/clientpositive/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap_acid_fast.q.out new file mode 100644 index 0000000000..855f00504f --- /dev/null +++ ql/src/test/results/clientpositive/llap_acid_fast.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: DROP TABLE orc_llap_acid_fast +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_acid_fast +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_acid_fast ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_acid_fast +POSTHOOK: query: CREATE TABLE orc_llap_acid_fast ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_acid_fast +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Execution mode: vectorized + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1] + dataColumns: cint:int, cbigint:bigint, cfloat:float, cdouble:double + partitionColumnCount: 1 + partitionColumns: csmallint:smallint + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap_acid_fast set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: query: update orc_llap_acid_fast set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +PREHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4, 1] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Execution mode: vectorized + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1] + dataColumns: cint:int, cbigint:bigint, cfloat:float, cdouble:double + partitionColumnCount: 1 + partitionColumns: csmallint:smallint + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +PREHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Input: default@orc_llap_acid_fast@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 1 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap_acid_fast +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_acid_fast +PREHOOK: Output: default@orc_llap_acid_fast +POSTHOOK: query: DROP TABLE orc_llap_acid_fast +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_acid_fast +POSTHOOK: Output: default@orc_llap_acid_fast