diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9c954be..6c14f2d 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2848,6 +2848,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "of aggregations that use complex types.\n", "For example, AVG uses a complex type (STRUCT) for partial aggregation results" + "The default value is true."), + HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false, + "This flag should be set to true to enable vectorization\n" + + "of ROW__ID.\n" + + "The default value is false."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1cc0104..f793c54 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -354,6 +354,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_reduce2.q,\ vector_reduce3.q,\ vector_reduce_groupby_decimal.q,\ + vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ vector_udf_character_length.q,\ diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 22ca025..79ec4ed 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -190,8 +190,10 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept } } } + // UNDONE: Virtual column support? return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), - colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]); + colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, + new VirtualColumn[0], new String[0]); } static TableScanOperator findTsOp(MapWork mapWork) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index ed50df2..1ac8914 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -125,6 +126,9 @@ private transient int dataColumnCount; private transient int partitionColumnCount; private transient Object[] partitionValues; + private transient int virtualColumnCount; + private transient boolean hasRowIdentifier; + private transient int rowIdentifierColumnNum; private transient boolean[] dataColumnsToIncludeTruncated; @@ -504,6 +508,19 @@ private void internalSetChildren(Configuration hconf) throws Exception { dataColumnCount = batchContext.getDataColumnCount(); partitionColumnCount = batchContext.getPartitionColumnCount(); partitionValues = new Object[partitionColumnCount]; + virtualColumnCount = batchContext.getVirtualColumnCount(); + rowIdentifierColumnNum = -1; + if (virtualColumnCount > 0) { + final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount; + VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns(); + hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID); + if (hasRowIdentifier) { + rowIdentifierColumnNum = firstVirtualColumnNum; + } + } else { + hasRowIdentifier = false; + } + dataColumnNums = batchContext.getDataColumnNums(); Preconditions.checkState(dataColumnNums != null); @@ -601,6 +618,13 @@ public void cleanUpInputFileChangedOp() throws HiveException { currentVectorPartContext.partName); } + private void setRowIdentiferToNull(VectorizedRowBatch batch) { + ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum]; + rowIdentifierColVector.isNull[0] = true; + rowIdentifierColVector.noNulls = false; + rowIdentifierColVector.isRepeating = true; + } + /* * Setup the context for reading from the next partition file. */ @@ -695,6 +719,12 @@ private void setupPartitionContextVars(String nominalPath) throws HiveException batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues); } + if (hasRowIdentifier) { + + // No ACID in code path -- set ROW__ID to NULL. + setRowIdentiferToNull(deserializerBatch); + } + /* * Set or clear the rest of the reading variables based on {vector|row} deserialization. */ @@ -778,7 +808,16 @@ public void process(Writable value) throws HiveException { */ batchCounter++; if (value != null) { - numRows += ((VectorizedRowBatch) value).size; + VectorizedRowBatch batch = (VectorizedRowBatch) value; + numRows += batch.size; + if (hasRowIdentifier) { + + // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader + // UNDONE: Or, perhaps tell it to do it before calling us, ... + // UNDONE: For now, set column to NULL. + + setRowIdentiferToNull(batch); + } } oneRootOperator.process(value, 0); if (oneRootOperator.getDone()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 503bd0c..614147c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -124,6 +124,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -221,6 +222,7 @@ public VectorizationContext(String contextName, List initialColumnNames, projectedColumns.add(i); projectionColumnMap.put(projectionColumnNames.get(i), i); } + int firstOutputColumnIndex = projectedColumns.size(); this.ocm = new OutputColumnManager(firstOutputColumnIndex); this.firstOutputColumnIndex = firstOutputColumnIndex; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 3c12e04..90d1372 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -78,6 +79,8 @@ private int[] dataColumnNums; private int dataColumnCount; private int partitionColumnCount; + private int virtualColumnCount; + private VirtualColumn[] neededVirtualColumns; private String[] scratchColumnTypeNames; @@ -88,14 +91,17 @@ public VectorizedRowBatchCtx() { } public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos, - int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) { + int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns, + String[] scratchColumnTypeNames) { this.rowColumnNames = rowColumnNames; this.rowColumnTypeInfos = rowColumnTypeInfos; this.dataColumnNums = dataColumnNums; this.partitionColumnCount = partitionColumnCount; + this.neededVirtualColumns = neededVirtualColumns; + this.virtualColumnCount = neededVirtualColumns.length; this.scratchColumnTypeNames = scratchColumnTypeNames; - dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount; + dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount; } public String[] getRowColumnNames() { @@ -118,6 +124,14 @@ public int getPartitionColumnCount() { return partitionColumnCount; } + public int getVirtualColumnCount() { + return virtualColumnCount; + } + + public VirtualColumn[] getNeededVirtualColumns() { + return neededVirtualColumns; + } + public String[] getScratchColumnTypeNames() { return scratchColumnTypeNames; } @@ -138,6 +152,8 @@ public void init(StructObjectInspector structObjectInspector, String[] scratchCo rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector); dataColumnNums = null; partitionColumnCount = 0; + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; dataColumnCount = rowColumnTypeInfos.length; // Scratch column information. @@ -204,13 +220,14 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDes */ public VectorizedRowBatch createVectorizedRowBatch() { - final int dataAndPartColumnCount = rowColumnTypeInfos.length; - final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length; + final int nonScratchColumnCount = rowColumnTypeInfos.length; + final int totalColumnCount = + nonScratchColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); if (dataColumnNums == null) { // All data and partition columns. - for (int i = 0; i < dataAndPartColumnCount; i++) { + for (int i = 0; i < nonScratchColumnCount; i++) { TypeInfo typeInfo = rowColumnTypeInfos[i]; result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); } @@ -218,24 +235,30 @@ public VectorizedRowBatch createVectorizedRowBatch() // Create only needed/included columns data columns. for (int i = 0; i < dataColumnNums.length; i++) { int columnNum = dataColumnNums[i]; - Preconditions.checkState(columnNum < dataAndPartColumnCount); + Preconditions.checkState(columnNum < nonScratchColumnCount); TypeInfo typeInfo = rowColumnTypeInfos[columnNum]; result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } - // Always create partition columns. - final int endColumnNum = dataColumnCount + partitionColumnCount; - for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) { + // Always create partition and virtual columns. + final int partitionEndColumnNum = dataColumnCount + partitionColumnCount; + for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) { TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum]; result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } + final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount; + for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) { + TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum]; + result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); + } } for (int i = 0; i < scratchColumnTypeNames.length; i++) { String typeName = scratchColumnTypeNames[i]; - result.cols[rowColumnTypeInfos.length + i] = + result.cols[nonScratchColumnCount + i] = VectorizedBatchUtil.createColumnVector(typeName); } + // UNDONE: Also remember virtualColumnCount... result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 2435bf1..0032305 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -20,10 +20,13 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.ListIterator; +import java.util.Map; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; @@ -64,6 +67,17 @@ ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName()); + public static final ImmutableMap VIRTUAL_COLUMN_NAME_MAP = + new ImmutableMap.Builder().putAll(getColumnNameMap()).build(); + + private static Map getColumnNameMap() { + Map map = new HashMap(); + for (VirtualColumn virtualColumn : values()) { + map.put(virtualColumn.name, virtualColumn); + } + return map; + } + private final String name; private final TypeInfo typeInfo; private final boolean isHidden; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 5f442a6..b3d38f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; @@ -212,6 +213,7 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.collect.ImmutableSet; import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { @@ -257,6 +259,10 @@ private Set supportedAggregationUdfs = new HashSet(); + // The set of virtual columns that vectorized readers *MAY* support. + public static final ImmutableSet vectorizableVirtualColumns = + ImmutableSet.of(VirtualColumn.ROWID); + private HiveConf hiveConf; private boolean useVectorizedInputFileFormat; @@ -265,6 +271,7 @@ private boolean isReduceVectorizationEnabled; private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isVectorizedRowIdentifierEnabled; private boolean isSchemaEvolution; @@ -298,6 +305,9 @@ private void clearNotVectorizedReason() { private long vectorizedVertexNum = -1; + private Set availableVectorizedVirtualColumnSet = null; + private Set neededVirtualColumnSet = null; + public Vectorizer() { /* @@ -435,6 +445,8 @@ public Vectorizer() { List dataColumnNums; int partitionColumnCount; + List availableVirtualColumnList; + List neededVirtualColumnList; boolean useVectorizedInputFileFormat; boolean groupByVectorOutput; @@ -470,6 +482,12 @@ public void setDataColumnNums(List dataColumnNums) { public void setPartitionColumnCount(int partitionColumnCount) { this.partitionColumnCount = partitionColumnCount; } + public void setAvailableVirtualColumnList(List availableVirtualColumnList) { + this.availableVirtualColumnList = availableVirtualColumnList; + } + public void setNeededVirtualColumnList(List neededVirtualColumnList) { + this.neededVirtualColumnList = neededVirtualColumnList; + } public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } @@ -504,6 +522,16 @@ public void setReduceColumnNullOrder(String reduceColumnNullOrder) { public void transferToBaseWork(BaseWork baseWork) { + final int virtualColumnCount; + VirtualColumn[] neededVirtualColumns; + if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) { + virtualColumnCount = neededVirtualColumnList.size(); + neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]); + } else { + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; + } + String[] allColumnNameArray = allColumnNames.toArray(new String[0]); TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]); int[] dataColumnNumsArray; @@ -519,6 +547,7 @@ public void transferToBaseWork(BaseWork baseWork) { allTypeInfoArray, dataColumnNumsArray, partitionColumnCount, + neededVirtualColumns, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); @@ -669,20 +698,41 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) } private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, - List logicalColumnNameList, List logicalTypeInfoList) { + List logicalColumnNameList, List logicalTypeInfoList, + List availableVirtualColumnList) { - // Add all non-virtual columns to make a vectorization context for + // Add all columns to make a vectorization context for // the TableScan operator. RowSchema rowSchema = tableScanOperator.getSchema(); for (ColumnInfo c : rowSchema.getSignature()) { - // Validation will later exclude vectorization of virtual columns usage (HIVE-5560). - if (!isVirtualColumn(c)) { - String columnName = c.getInternalName(); - String typeName = c.getTypeName(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + // Validation will later exclude vectorization of virtual columns usage if necessary. + String columnName = c.getInternalName(); + + // Turns out partition columns get marked as virtual in ColumnInfo, so we need to + // check the VirtualColumn directly. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn == null) { logicalColumnNameList.add(columnName); - logicalTypeInfoList.add(typeInfo); + logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName())); + } else { + + // The planner gives us a subset virtual columns available for this table scan. + // AND + // We only support some virtual columns in vectorization. + // + // So, create the intersection. Note these are available vectorizable virtual columns. + // Later we remember which virtual columns were *actually used* in the query so + // just those will be included in the Map VectorizedRowBatchCtx that has the + // information for creating the Map VectorizedRowBatch. + // + if (!vectorizableVirtualColumns.contains(virtualColumn)) { + continue; + } + if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) { + continue; + } + availableVirtualColumnList.add(virtualColumn); } } } @@ -875,14 +925,19 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); // These names/types are the data columns plus partition columns. - final List allColumnNameList = new ArrayList(); - final List allTypeInfoList = new ArrayList(); + final List dataAndPartColumnNameList = new ArrayList(); + final List dataAndPartTypeInfoList = new ArrayList(); + + final List availableVirtualColumnList = new ArrayList(); - getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); + getTableScanOperatorSchemaInfo( + tableScanOperator, + dataAndPartColumnNameList, dataAndPartTypeInfoList, + availableVirtualColumnList); final List dataColumnNums = new ArrayList(); - final int allColumnCount = allColumnNameList.size(); + final int dataAndPartColumnCount = dataAndPartColumnNameList.size(); /* * Validate input formats of all the partitions can be vectorized. @@ -938,17 +993,17 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable LinkedHashMap partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); - dataColumnCount = allColumnCount - partitionColumnCount; + dataColumnCount = dataAndPartColumnCount - partitionColumnCount; } else { partitionColumnCount = 0; - dataColumnCount = allColumnCount; + dataColumnCount = dataAndPartColumnCount; } - determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount, + determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount, dataColumnNums); - tableDataColumnList = allColumnNameList.subList(0, dataColumnCount); - tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); + tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount); + tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount); isFirst = false; } @@ -1020,10 +1075,14 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList); } - vectorTaskColumnInfo.setAllColumnNames(allColumnNameList); - vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList); + // For now, we don't know which virtual columns are going to be included. We'll add them + // later... + vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList); + vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList); + vectorTaskColumnInfo.setDataColumnNums(dataColumnNums); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList); vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); // Always set these so EXPLAIN can see. @@ -1064,6 +1123,14 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask return false; } + // Set global member indicating which virtual columns are possible to be used by + // the Map vertex. + availableVectorizedVirtualColumnSet = new HashSet(); + availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList); + + // And, use set to remember which virtual columns were actually referenced. + neededVirtualColumnSet = new HashSet(); + // Now we are enabled and any issues found from here on out are considered // not vectorized issues. mapWork.setVectorizationEnabled(true); @@ -1086,6 +1153,21 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask } } } + + List neededVirtualColumnList = new ArrayList(); + if (!neededVirtualColumnSet.isEmpty()) { + + // Create needed in same order. + for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) { + if (neededVirtualColumnSet.contains(virtualColumn)) { + neededVirtualColumnList.add(virtualColumn); + vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName()); + vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo()); + } + } + } + + vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList); vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -1716,6 +1798,10 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED); + isVectorizedRowIdentifierEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -2139,10 +2225,24 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi VectorExpressionDescriptor.Mode mode, boolean allowComplex) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; - // Currently, we do not support vectorized virtual columns (see HIVE-5570). - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); - return false; + String columnName = c.getColumn(); + + if (availableVectorizedVirtualColumnSet != null) { + + // For Map, check for virtual columns. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn != null) { + + // We support some virtual columns in vectorization for this table scan. + + if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) { + setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported"); + return false; + } + + // Remember we used this one in the query. + neededVirtualColumnSet.add(virtualColumn); + } } } String typeName = desc.getTypeInfo().getTypeName(); @@ -3662,28 +3762,20 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { return vectorOp; } - private boolean isVirtualColumn(ColumnInfo column) { - - // Not using method column.getIsVirtualCol() because partitioning columns are also - // treated as virtual columns in ColumnInfo. - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) { - return true; - } - return false; - } - public void debugDisplayAllMaps(BaseWork work) { VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx(); String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount(); + int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount(); String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames(); - LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames)); - LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos)); + LOG.debug("debugDisplayAllMaps rowColumnNames " + Arrays.toString(allColumnNames)); + LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + Arrays.toString(columnTypeInfos)); LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount); + LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount); LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); } } diff --git ql/src/test/queries/clientpositive/vector_row__id.q ql/src/test/queries/clientpositive/vector_row__id.q new file mode 100644 index 0000000..11eda90 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_row__id.q @@ -0,0 +1,56 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +drop table if exists hello_acid; +create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true'); + +insert into hello_acid partition (load_date='2016-03-01') values (1, 1); +insert into hello_acid partition (load_date='2016-03-02') values (2, 2); +insert into hello_acid partition (load_date='2016-03-03') values (3, 3); + +set hive.vectorized.row.identifier.enabled=false; + +explain vectorization detail +select row__id, key, value from hello_acid order by key; + +select row__id, key, value from hello_acid order by key; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + + + +set hive.vectorized.row.identifier.enabled=true; + +explain vectorization detail +select row__id, key, value from hello_acid order by key; + +select row__id, key, value from hello_acid order by key; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + diff --git ql/src/test/results/clientpositive/llap/vector_row__id.q.out ql/src/test/results/clientpositive/llap/vector_row__id.q.out new file mode 100644 index 0000000..850e3a4 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_row__id.q.out @@ -0,0 +1,605 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col2 (type: int) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct, VALUE._col1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +{"transactionid":3,"bucketid":536870912,"rowid":0} 1 1 +{"transactionid":4,"bucketid":536870912,"rowid":0} 2 2 +{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: ROW__ID (type: struct), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 1] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 1 + partitionColumns: load_date:date + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct, VALUE._col1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +NULL 1 1 +NULL 2 2 +NULL 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 diff --git ql/src/test/results/clientpositive/vector_row__id.q.out ql/src/test/results/clientpositive/vector_row__id.q.out new file mode 100644 index 0000000..d48902b --- /dev/null +++ ql/src/test/results/clientpositive/vector_row__id.q.out @@ -0,0 +1,491 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col2 (type: int) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +{"transactionid":3,"bucketid":536936448,"rowid":0} 1 1 +{"transactionid":4,"bucketid":537001984,"rowid":0} 2 2 +{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: ROW__ID (type: struct), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1] + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col2 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 1 + partitionColumns: load_date:date + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +NULL 1 1 +NULL 2 2 +NULL 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3