diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f360dfa..df45f2c 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2856,6 +2856,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "of aggregations that use complex types.\n", "For example, AVG uses a complex type (STRUCT) for partial aggregation results" + "The default value is true."), + HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false, + "This flag should be set to true to enable vectorization\n" + + "of ROW__ID.\n" + + "The default value is false."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index cffe245..f66e19b 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -356,6 +356,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_reduce2.q,\ vector_reduce3.q,\ vector_reduce_groupby_decimal.q,\ + vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ vector_udf_character_length.q,\ diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 22ca025..79ec4ed 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -190,8 +190,10 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept } } } + // UNDONE: Virtual column support? return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), - colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]); + colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, + new VirtualColumn[0], new String[0]); } static TableScanOperator findTsOp(MapWork mapWork) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index ed50df2..1ac8914 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -125,6 +126,9 @@ private transient int dataColumnCount; private transient int partitionColumnCount; private transient Object[] partitionValues; + private transient int virtualColumnCount; + private transient boolean hasRowIdentifier; + private transient int rowIdentifierColumnNum; private transient boolean[] dataColumnsToIncludeTruncated; @@ -504,6 +508,19 @@ private void internalSetChildren(Configuration hconf) throws Exception { dataColumnCount = batchContext.getDataColumnCount(); partitionColumnCount = batchContext.getPartitionColumnCount(); partitionValues = new Object[partitionColumnCount]; + virtualColumnCount = batchContext.getVirtualColumnCount(); + rowIdentifierColumnNum = -1; + if (virtualColumnCount > 0) { + final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount; + VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns(); + hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID); + if (hasRowIdentifier) { + rowIdentifierColumnNum = firstVirtualColumnNum; + } + } else { + hasRowIdentifier = false; + } + dataColumnNums = batchContext.getDataColumnNums(); Preconditions.checkState(dataColumnNums != null); @@ -601,6 +618,13 @@ public void cleanUpInputFileChangedOp() throws HiveException { currentVectorPartContext.partName); } + private void setRowIdentiferToNull(VectorizedRowBatch batch) { + ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum]; + rowIdentifierColVector.isNull[0] = true; + rowIdentifierColVector.noNulls = false; + rowIdentifierColVector.isRepeating = true; + } + /* * Setup the context for reading from the next partition file. */ @@ -695,6 +719,12 @@ private void setupPartitionContextVars(String nominalPath) throws HiveException batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues); } + if (hasRowIdentifier) { + + // No ACID in code path -- set ROW__ID to NULL. + setRowIdentiferToNull(deserializerBatch); + } + /* * Set or clear the rest of the reading variables based on {vector|row} deserialization. */ @@ -778,7 +808,16 @@ public void process(Writable value) throws HiveException { */ batchCounter++; if (value != null) { - numRows += ((VectorizedRowBatch) value).size; + VectorizedRowBatch batch = (VectorizedRowBatch) value; + numRows += batch.size; + if (hasRowIdentifier) { + + // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader + // UNDONE: Or, perhaps tell it to do it before calling us, ... + // UNDONE: For now, set column to NULL. + + setRowIdentiferToNull(batch); + } } oneRootOperator.process(value, 0); if (oneRootOperator.getDone()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 9e026f0..fcebb6f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -124,6 +124,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -221,6 +222,7 @@ public VectorizationContext(String contextName, List initialColumnNames, projectedColumns.add(i); projectionColumnMap.put(projectionColumnNames.get(i), i); } + int firstOutputColumnIndex = projectedColumns.size(); this.ocm = new OutputColumnManager(firstOutputColumnIndex); this.firstOutputColumnIndex = firstOutputColumnIndex; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 3c12e04..90d1372 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -78,6 +79,8 @@ private int[] dataColumnNums; private int dataColumnCount; private int partitionColumnCount; + private int virtualColumnCount; + private VirtualColumn[] neededVirtualColumns; private String[] scratchColumnTypeNames; @@ -88,14 +91,17 @@ public VectorizedRowBatchCtx() { } public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos, - int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) { + int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns, + String[] scratchColumnTypeNames) { this.rowColumnNames = rowColumnNames; this.rowColumnTypeInfos = rowColumnTypeInfos; this.dataColumnNums = dataColumnNums; this.partitionColumnCount = partitionColumnCount; + this.neededVirtualColumns = neededVirtualColumns; + this.virtualColumnCount = neededVirtualColumns.length; this.scratchColumnTypeNames = scratchColumnTypeNames; - dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount; + dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount; } public String[] getRowColumnNames() { @@ -118,6 +124,14 @@ public int getPartitionColumnCount() { return partitionColumnCount; } + public int getVirtualColumnCount() { + return virtualColumnCount; + } + + public VirtualColumn[] getNeededVirtualColumns() { + return neededVirtualColumns; + } + public String[] getScratchColumnTypeNames() { return scratchColumnTypeNames; } @@ -138,6 +152,8 @@ public void init(StructObjectInspector structObjectInspector, String[] scratchCo rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector); dataColumnNums = null; partitionColumnCount = 0; + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; dataColumnCount = rowColumnTypeInfos.length; // Scratch column information. @@ -204,13 +220,14 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDes */ public VectorizedRowBatch createVectorizedRowBatch() { - final int dataAndPartColumnCount = rowColumnTypeInfos.length; - final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length; + final int nonScratchColumnCount = rowColumnTypeInfos.length; + final int totalColumnCount = + nonScratchColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); if (dataColumnNums == null) { // All data and partition columns. - for (int i = 0; i < dataAndPartColumnCount; i++) { + for (int i = 0; i < nonScratchColumnCount; i++) { TypeInfo typeInfo = rowColumnTypeInfos[i]; result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); } @@ -218,24 +235,30 @@ public VectorizedRowBatch createVectorizedRowBatch() // Create only needed/included columns data columns. for (int i = 0; i < dataColumnNums.length; i++) { int columnNum = dataColumnNums[i]; - Preconditions.checkState(columnNum < dataAndPartColumnCount); + Preconditions.checkState(columnNum < nonScratchColumnCount); TypeInfo typeInfo = rowColumnTypeInfos[columnNum]; result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } - // Always create partition columns. - final int endColumnNum = dataColumnCount + partitionColumnCount; - for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) { + // Always create partition and virtual columns. + final int partitionEndColumnNum = dataColumnCount + partitionColumnCount; + for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) { TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum]; result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } + final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount; + for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) { + TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum]; + result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); + } } for (int i = 0; i < scratchColumnTypeNames.length; i++) { String typeName = scratchColumnTypeNames[i]; - result.cols[rowColumnTypeInfos.length + i] = + result.cols[nonScratchColumnCount + i] = VectorizedBatchUtil.createColumnVector(typeName); } + // UNDONE: Also remember virtualColumnCount... result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 2435bf1..0032305 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -20,10 +20,13 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.ListIterator; +import java.util.Map; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; @@ -64,6 +67,17 @@ ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName()); + public static final ImmutableMap VIRTUAL_COLUMN_NAME_MAP = + new ImmutableMap.Builder().putAll(getColumnNameMap()).build(); + + private static Map getColumnNameMap() { + Map map = new HashMap(); + for (VirtualColumn virtualColumn : values()) { + map.put(virtualColumn.name, virtualColumn); + } + return map; + } + private final String name; private final TypeInfo typeInfo; private final boolean isHidden; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 8183194..0913f40 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; @@ -229,6 +230,7 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.collect.ImmutableSet; import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { @@ -274,6 +276,10 @@ private Set supportedAggregationUdfs = new HashSet(); + // The set of virtual columns that vectorized readers *MAY* support. + public static final ImmutableSet vectorizableVirtualColumns = + ImmutableSet.of(VirtualColumn.ROWID); + private HiveConf hiveConf; private boolean useVectorizedInputFileFormat; @@ -283,6 +289,7 @@ private boolean isPtfVectorizationEnabled; private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isVectorizedRowIdentifierEnabled; private boolean isSchemaEvolution; @@ -316,6 +323,9 @@ private void clearNotVectorizedReason() { private long vectorizedVertexNum = -1; + private Set availableVectorizedVirtualColumnSet = null; + private Set neededVirtualColumnSet = null; + public Vectorizer() { /* @@ -453,6 +463,8 @@ public Vectorizer() { List dataColumnNums; int partitionColumnCount; + List availableVirtualColumnList; + List neededVirtualColumnList; boolean useVectorizedInputFileFormat; boolean groupByVectorOutput; @@ -488,6 +500,12 @@ public void setDataColumnNums(List dataColumnNums) { public void setPartitionColumnCount(int partitionColumnCount) { this.partitionColumnCount = partitionColumnCount; } + public void setAvailableVirtualColumnList(List availableVirtualColumnList) { + this.availableVirtualColumnList = availableVirtualColumnList; + } + public void setNeededVirtualColumnList(List neededVirtualColumnList) { + this.neededVirtualColumnList = neededVirtualColumnList; + } public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } @@ -522,6 +540,16 @@ public void setReduceColumnNullOrder(String reduceColumnNullOrder) { public void transferToBaseWork(BaseWork baseWork) { + final int virtualColumnCount; + VirtualColumn[] neededVirtualColumns; + if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) { + virtualColumnCount = neededVirtualColumnList.size(); + neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]); + } else { + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; + } + String[] allColumnNameArray = allColumnNames.toArray(new String[0]); TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]); int[] dataColumnNumsArray; @@ -537,6 +565,7 @@ public void transferToBaseWork(BaseWork baseWork) { allTypeInfoArray, dataColumnNumsArray, partitionColumnCount, + neededVirtualColumns, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); @@ -687,20 +716,41 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) } private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, - List logicalColumnNameList, List logicalTypeInfoList) { + List logicalColumnNameList, List logicalTypeInfoList, + List availableVirtualColumnList) { - // Add all non-virtual columns to make a vectorization context for + // Add all columns to make a vectorization context for // the TableScan operator. RowSchema rowSchema = tableScanOperator.getSchema(); for (ColumnInfo c : rowSchema.getSignature()) { - // Validation will later exclude vectorization of virtual columns usage (HIVE-5560). - if (!isVirtualColumn(c)) { - String columnName = c.getInternalName(); - String typeName = c.getTypeName(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + // Validation will later exclude vectorization of virtual columns usage if necessary. + String columnName = c.getInternalName(); + + // Turns out partition columns get marked as virtual in ColumnInfo, so we need to + // check the VirtualColumn directly. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn == null) { logicalColumnNameList.add(columnName); - logicalTypeInfoList.add(typeInfo); + logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName())); + } else { + + // The planner gives us a subset virtual columns available for this table scan. + // AND + // We only support some virtual columns in vectorization. + // + // So, create the intersection. Note these are available vectorizable virtual columns. + // Later we remember which virtual columns were *actually used* in the query so + // just those will be included in the Map VectorizedRowBatchCtx that has the + // information for creating the Map VectorizedRowBatch. + // + if (!vectorizableVirtualColumns.contains(virtualColumn)) { + continue; + } + if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) { + continue; + } + availableVirtualColumnList.add(virtualColumn); } } } @@ -893,14 +943,19 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); // These names/types are the data columns plus partition columns. - final List allColumnNameList = new ArrayList(); - final List allTypeInfoList = new ArrayList(); + final List dataAndPartColumnNameList = new ArrayList(); + final List dataAndPartTypeInfoList = new ArrayList(); + + final List availableVirtualColumnList = new ArrayList(); - getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); + getTableScanOperatorSchemaInfo( + tableScanOperator, + dataAndPartColumnNameList, dataAndPartTypeInfoList, + availableVirtualColumnList); final List dataColumnNums = new ArrayList(); - final int allColumnCount = allColumnNameList.size(); + final int dataAndPartColumnCount = dataAndPartColumnNameList.size(); /* * Validate input formats of all the partitions can be vectorized. @@ -956,17 +1011,17 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable LinkedHashMap partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); - dataColumnCount = allColumnCount - partitionColumnCount; + dataColumnCount = dataAndPartColumnCount - partitionColumnCount; } else { partitionColumnCount = 0; - dataColumnCount = allColumnCount; + dataColumnCount = dataAndPartColumnCount; } - determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount, + determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount, dataColumnNums); - tableDataColumnList = allColumnNameList.subList(0, dataColumnCount); - tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); + tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount); + tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount); isFirst = false; } @@ -1038,10 +1093,14 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList); } - vectorTaskColumnInfo.setAllColumnNames(allColumnNameList); - vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList); + // For now, we don't know which virtual columns are going to be included. We'll add them + // later... + vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList); + vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList); + vectorTaskColumnInfo.setDataColumnNums(dataColumnNums); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList); vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); // Always set these so EXPLAIN can see. @@ -1082,6 +1141,14 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask return false; } + // Set global member indicating which virtual columns are possible to be used by + // the Map vertex. + availableVectorizedVirtualColumnSet = new HashSet(); + availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList); + + // And, use set to remember which virtual columns were actually referenced. + neededVirtualColumnSet = new HashSet(); + // Now we are enabled and any issues found from here on out are considered // not vectorized issues. mapWork.setVectorizationEnabled(true); @@ -1104,6 +1171,21 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask } } } + + List neededVirtualColumnList = new ArrayList(); + if (!neededVirtualColumnSet.isEmpty()) { + + // Create needed in same order. + for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) { + if (neededVirtualColumnSet.contains(virtualColumn)) { + neededVirtualColumnList.add(virtualColumn); + vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName()); + vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo()); + } + } + } + + vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList); vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -1737,6 +1819,10 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED); + isVectorizedRowIdentifierEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -2328,10 +2414,24 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi VectorExpressionDescriptor.Mode mode, boolean allowComplex) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; - // Currently, we do not support vectorized virtual columns (see HIVE-5570). - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); - return false; + String columnName = c.getColumn(); + + if (availableVectorizedVirtualColumnSet != null) { + + // For Map, check for virtual columns. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn != null) { + + // We support some virtual columns in vectorization for this table scan. + + if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) { + setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported"); + return false; + } + + // Remember we used this one in the query. + neededVirtualColumnSet.add(virtualColumn); + } } } String typeName = desc.getTypeInfo().getTypeName(); @@ -4180,28 +4280,20 @@ private static VectorPTFInfo createVectorPTFInfo(Operator