diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 36dc95a..f2af438 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -156,6 +156,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_partitioned.q,\ update_two_cols.q,\ vector_between_in.q,\ + vector_bucket.q,\ vector_cast_constant.q,\ vector_char_4.q,\ vector_char_simple.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 5bdeb92..b68de19 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -429,20 +429,20 @@ private static BaseWork getBaseWork(Configuration conf, String name) { } } - public static Map> getScratchColumnVectorTypes(Configuration hiveConf) { + public static Map> getAllScratchColumnVectorTypeMaps(Configuration hiveConf) { BaseWork baseWork = getMapWork(hiveConf); if (baseWork == null) { baseWork = getReduceWork(hiveConf); } - return baseWork.getScratchColumnVectorTypes(); + return baseWork.getAllScratchColumnVectorTypeMaps(); } - public static Map> getScratchColumnMap(Configuration hiveConf) { + public static Map> getAllColumnVectorMaps(Configuration hiveConf) { BaseWork baseWork = getMapWork(hiveConf); if (baseWork == null) { baseWork = getReduceWork(hiveConf); } - return baseWork.getScratchColumnMap(); + return baseWork.getAllColumnVectorMaps(); } public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 8e3c2fe..8910af6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -101,7 +101,7 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc, reader, tag == position, (byte) tag, - redWork.getScratchColumnVectorTypes()); + redWork.getAllScratchColumnVectorTypeMaps()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java index 1ddcbc6..7f4bb64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java @@ -18,85 +18,118 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; +import java.util.ArrayList; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.ExtractOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** - * Vectorized extract operator implementation. Consumes rows and outputs a - * vectorized batch of subobjects. + * Vectorized extract operator implementation. **/ -public class VectorExtractOperator extends ExtractOperator { +public class VectorExtractOperator extends ExtractOperator implements VectorizationContextRegion { private static final long serialVersionUID = 1L; - private int keyColCount; - private int valueColCount; - - private transient VectorizedRowBatch outputBatch; - private transient int remainingColCount; + private List reduceTypeInfos; + + // Create a new outgoing vectorization context because we will project just the values. + private VectorizationContext vOutContext; + + private int[] projectedColumns; + private String removeValueDotPrefix(String columnName) { + return columnName.substring("VALUE.".length()); + } public VectorExtractOperator(VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(); this.conf = (ExtractDesc) conf; + + List reduceColumnNames = vContext.getProjectionColumnNames(); + int reduceColCount = reduceColumnNames.size(); + + /* + * Create a new vectorization context as projection of just the values columns, but + * keep same output column manager must be inherited to track the scratch the columns. + */ + vOutContext = new VectorizationContext(vContext); + + // Set a fileKey with vectorization context. + vOutContext.setFileKey(vContext.getFileKey() + "/_EXTRACT_"); + + // Remove "VALUE." prefix from value columns and create a new projection + vOutContext.resetProjectionColumns(); + for (int i = 0; i < reduceColCount; i++) { + String columnName = reduceColumnNames.get(i); + if (columnName.startsWith("VALUE.")) { + vOutContext.addProjectionColumn(removeValueDotPrefix(columnName), i); + } + } } public VectorExtractOperator() { super(); } + /* + * Called by the Vectorizer class to pass the types from reduce shuffle. + */ + public void setReduceTypeInfos(List reduceTypeInfos) { + this.reduceTypeInfos = reduceTypeInfos; + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { - StructObjectInspector structInputObjInspector = (StructObjectInspector) inputObjInspectors[0]; - List fields = structInputObjInspector.getAllStructFieldRefs(); - ArrayList ois = new ArrayList(); - ArrayList colNames = new ArrayList(); - for (int i = keyColCount; i < fields.size(); i++) { - StructField field = fields.get(i); - String fieldName = field.getFieldName(); - - // Remove "VALUE." prefix. - int dotIndex = fieldName.indexOf("."); - colNames.add(fieldName.substring(dotIndex + 1)); - ois.add(field.getFieldObjectInspector()); + // Create the projection of the values and the output object inspector + // for just the value without their "VALUE." prefix. + int projectionSize = vOutContext.getProjectedColumns().size(); + projectedColumns = new int[projectionSize]; + List columnNames = new ArrayList(); + List ois = new ArrayList(); + for (int i = 0; i < projectionSize; i++) { + int projectedIndex = vOutContext.getProjectedColumns().get(i); + projectedColumns[i] = projectedIndex; + String colName = vOutContext.getProjectionColumnNames().get(i); + columnNames.add(colName); + TypeInfo typeInfo = reduceTypeInfos.get(projectedIndex); + ObjectInspector oi = TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + ois.add(oi); } - outputObjInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(colNames, ois); - remainingColCount = fields.size() - keyColCount; - outputBatch = new VectorizedRowBatch(remainingColCount); + outputObjInspector = ObjectInspectorFactory. + getStandardStructObjectInspector(columnNames, ois); initializeChildren(hconf); } - public void setKeyAndValueColCounts(int keyColCount, int valueColCount) { - this.keyColCount = keyColCount; - this.valueColCount = valueColCount; - } @Override // Remove the key columns and forward the values (and scratch columns). public void processOp(Object row, int tag) throws HiveException { - VectorizedRowBatch inputBatch = (VectorizedRowBatch) row; + VectorizedRowBatch vrg = (VectorizedRowBatch) row; - // Copy references to the input columns array starting after the keys... - for (int i = 0; i < remainingColCount; i++) { - outputBatch.cols[i] = inputBatch.cols[keyColCount + i]; - } - outputBatch.size = inputBatch.size; + int[] originalProjections = vrg.projectedColumns; + int originalProjectionSize = vrg.projectionSize; - forward(outputBatch, outputObjInspector); + // Temporarily substitute our projection. + vrg.projectionSize = projectedColumns.length; + vrg.projectedColumns = projectedColumns; + + forward(vrg, null); + + // Revert the projected columns back, because vrg will be re-used. + vrg.projectionSize = originalProjectionSize; + vrg.projectedColumns = originalProjections; + } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vOutContext; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index ea32f33..858604c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** @@ -50,10 +51,22 @@ public VectorFileSinkOperator() { @Override protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); - valueWriters = VectorExpressionWriterFactory.getExpressionWriters( - (StructObjectInspector) inputObjInspectors[0]); + // We need a input object inspector that is for the row we will extract out of the + // vectorized row batch, not for example, an original inspector for an ORC table, etc. + VectorExpressionWriterFactory.processVectorInspector( + (StructObjectInspector) inputObjInspectors[0], + new VectorExpressionWriterFactory.SingleOIDClosure() { + @Override + public void assign(VectorExpressionWriter[] writers, + ObjectInspector objectInspector) { + valueWriters = writers; + inputObjInspectors[0] = objectInspector; + } + }); singleRow = new Object[valueWriters.length]; + + // Call FileSinkOperator with new input inspector. + super.initializeOp(hconf); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 86b60ab..856ff20 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -32,11 +32,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; -import org.apache.hadoop.hive.ql.exec.KeyWrapperFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -46,12 +43,9 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.DataOutputBuffer; @@ -760,13 +754,7 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) isVectorOutput = desc.getVectorDesc().isVectorOutput(); - List outColNames = desc.getOutputColumnNames(); - Map mapOutCols = new HashMap(outColNames.size()); - int outColIndex = 0; - for(String outCol: outColNames) { - mapOutCols.put(outCol, outColIndex++); - } - vOutContext = new VectorizationContext(mapOutCols, outColIndex); + vOutContext = new VectorizationContext(desc.getOutputColumnNames()); vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_"); fileKey = vOutContext.getFileKey(); } @@ -811,7 +799,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { vrbCtx.init(hconf, fileKey, (StructObjectInspector) outputObjInspector); outputBatch = vrbCtx.createVectorizedRowBatch(); vectorColumnAssign = VectorColumnAssignFactory.buildAssigners( - outputBatch, outputObjInspector, vOutContext.getColumnMap(), conf.getOutputColumnNames()); + outputBatch, outputObjInspector, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames()); } } catch (HiveException he) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 30e36ac..5fda5dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -28,11 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -41,8 +37,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -116,16 +110,8 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) Map> exprs = desc.getExprs(); bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); - List outColNames = desc.getOutputColumnNames(); - - Map mapOutCols = new HashMap(outColNames.size()); - - int outColIndex = 0; - for(String outCol: outColNames) { - mapOutCols.put(outCol, outColIndex++); - } - - vOutContext = new VectorizationContext(mapOutCols, outColIndex); + // We are making a new output vectorized row batch. + vOutContext = new VectorizationContext(desc.getOutputColumnNames()); vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias()); this.fileKey = vOutContext.getFileKey(); } @@ -207,7 +193,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive Object[] values = (Object[]) row; VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); if (null == vcas) { - Map> allColumnMaps = Utilities.getScratchColumnMap(hconf); + Map> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf); Map columnMap = allColumnMaps.get(fileKey); vcas = VectorColumnAssignFactory.buildAssigners( outputBatch, outputOI, columnMap, conf.getOutputColumnNames()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 0591531..22ffb6e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -27,16 +27,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -116,19 +113,9 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf Map> exprs = desc.getExprs(); bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); - - // Vectorized join operators need to create a new vectorization region for child operators. - - List outColNames = desc.getOutputColumnNames(); - - Map mapOutCols = new HashMap(outColNames.size()); - - int outColIndex = 0; - for(String outCol: outColNames) { - mapOutCols.put(outCol, outColIndex++); - } - vOutContext = new VectorizationContext(mapOutCols, outColIndex); + // We are making a new output vectorized row batch. + vOutContext = new VectorizationContext(desc.getOutputColumnNames()); vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); this.fileKey = vOutContext.getFileKey(); } @@ -285,7 +272,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive Object[] values = (Object[]) row; VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); if (null == vcas) { - Map> allColumnMaps = Utilities.getScratchColumnMap(hconf); + Map> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf); Map columnMap = allColumnMaps.get(fileKey); vcas = VectorColumnAssignFactory.buildAssigners( outputBatch, outputOI, columnMap, conf.getOutputColumnNames()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index a65e594..458dc5a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -63,20 +62,20 @@ public VectorSelectOperator(VectorizationContext vContext, OperatorDesc conf) } /** - * Create a new vectorization context to update the column map but same output column manager - * must be inherited to track the scratch the columns. + * Create a new vectorization context to create a new projection, but keep + * same output column manager must be inherited to track the scratch the columns. */ vOutContext = new VectorizationContext(vContext); // Set a fileKey, although this operator doesn't use it. vOutContext.setFileKey(vContext.getFileKey() + "/_SELECT_"); - // Update column map - vOutContext.getColumnMap().clear(); + vOutContext.resetProjectionColumns(); for (int i=0; i < colList.size(); ++i) { String columnName = this.conf.getOutputColumnNames().get(i); VectorExpression ve = vExpressions[i]; - vOutContext.addToColumnMap(columnName, ve.getOutputColumn()); + vOutContext.addProjectionColumn(columnName, + ve.getOutputColumn()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index fdd43e7..42ad37d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -23,11 +23,13 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.regex.Pattern; import org.apache.commons.logging.Log; @@ -123,9 +125,98 @@ VectorExpressionDescriptor vMap; + private List projectedColumns; + private List projectionColumnNames; + private Map projectionColumnMap; + //columnName to column position map - private final Map columnMap; - private final int firstOutputColumnIndex; + // private final Map columnMap; + private int firstOutputColumnIndex; + + // Convenient constructor for initial batch creation takes + // a list of columns names and maps them to 0..n-1 indices. + public VectorizationContext(List initialColumnNames) { + this.projectionColumnNames = initialColumnNames; + + projectedColumns = new ArrayList(); + projectionColumnMap = new HashMap(); + for (int i = 0; i < this.projectionColumnNames.size(); i++) { + projectedColumns.add(i); + projectionColumnMap.put(projectionColumnNames.get(i), i); + } + int firstOutputColumnIndex = projectedColumns.size(); + this.ocm = new OutputColumnManager(firstOutputColumnIndex); + this.firstOutputColumnIndex = firstOutputColumnIndex; + vMap = new VectorExpressionDescriptor(); + } + + // Constructor to with the individual addInitialColumn method + // followed by a call to finishedAddingInitialColumns. + public VectorizationContext() { + projectedColumns = new ArrayList(); + projectionColumnNames = new ArrayList(); + projectionColumnMap = new HashMap(); + this.ocm = new OutputColumnManager(0); + this.firstOutputColumnIndex = 0; + vMap = new VectorExpressionDescriptor(); + } + + // Constructor useful making a projection vectorization context. + // Use with resetProjectionColumns and addProjectionColumn. + // Keeps existing output column map, etc. + public VectorizationContext(VectorizationContext vContext) { + this.projectedColumns = new ArrayList(); + this.projectionColumnNames = new ArrayList(); + this.projectionColumnMap = new HashMap(); + + this.ocm = vContext.ocm; + this.firstOutputColumnIndex = vContext.firstOutputColumnIndex; + vMap = new VectorExpressionDescriptor(); + } + + // Add an initial column to a vectorization context when + // a vectorized row batch is being created. + public void addInitialColumn(String columnName) { + int index = projectedColumns.size(); + projectedColumns.add(index); + projectionColumnNames.add(columnName); + projectionColumnMap.put(columnName, index); + } + + // Finishes the vectorization context after all the initial + // columns have been added. + public void finishedAddingInitialColumns() { + int firstOutputColumnIndex = projectedColumns.size(); + this.ocm = new OutputColumnManager(firstOutputColumnIndex); + this.firstOutputColumnIndex = firstOutputColumnIndex; + } + + // Empties the projection columns. + public void resetProjectionColumns() { + projectedColumns = new ArrayList(); + projectionColumnNames = new ArrayList(); + projectionColumnMap = new HashMap(); + } + + // Add a projection column to a projection vectorization context. + public void addProjectionColumn(String columnName, int vectorBatchColIndex) { + projectedColumns.add(vectorBatchColIndex); + projectionColumnNames.add(columnName); + projectionColumnMap.put(columnName, vectorBatchColIndex); + } + + public List getProjectedColumns() { + return projectedColumns; + } + + public List getProjectionColumnNames() { + return projectionColumnNames; + } + + public Map getProjectionColumnMap() { + return projectionColumnMap; + } + public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", Pattern.CASE_INSENSITIVE); @@ -140,7 +231,7 @@ Pattern.CASE_INSENSITIVE); //Map column number to type - private final OutputColumnManager ocm; + private OutputColumnManager ocm; // File key is used by operators to retrieve the scratch vectors // from mapWork at runtime. The operators that modify the structure of @@ -170,27 +261,6 @@ castExpressionUdfs.add(UDFToShort.class); } - public VectorizationContext(Map columnMap, - int initialOutputCol) { - this.columnMap = columnMap; - this.ocm = new OutputColumnManager(initialOutputCol); - this.firstOutputColumnIndex = initialOutputCol; - vMap = new VectorExpressionDescriptor(); - } - - /** - * This constructor inherits the OutputColumnManger and from - * the 'parent' constructor, therefore this should be used only by operators - * that don't create a new vectorized row batch. This should be used only by - * operators that want to modify the columnName map without changing the row batch. - */ - public VectorizationContext(VectorizationContext parent) { - this.columnMap = new HashMap(parent.columnMap); - this.ocm = parent.ocm; - this.firstOutputColumnIndex = parent.firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - } - public String getFileKey() { return fileKey; } @@ -199,16 +269,19 @@ public void setFileKey(String fileKey) { this.fileKey = fileKey; } - protected int getInputColumnIndex(String name) { - if (!columnMap.containsKey(name)) { - LOG.error(String.format("The column %s is not in the vectorization context column map %s.", - name, columnMap.toString())); + protected int getInputColumnIndex(String name) throws HiveException { + if (name == null) { + throw new HiveException("Null column name"); + } + if (!projectionColumnMap.containsKey(name)) { + throw new HiveException(String.format("The column %s is not in the vectorization context column map %s.", + name, projectionColumnMap.toString())); } - return columnMap.get(name); + return projectionColumnMap.get(name); } protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) { - return columnMap.get(colExpr.getColumn()); + return projectionColumnMap.get(colExpr.getColumn()); } private static class OutputColumnManager { @@ -280,7 +353,7 @@ void freeOutputColumn(int index) { } private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc - exprDesc, Mode mode) { + exprDesc, Mode mode) throws HiveException { int columnNum = getInputColumnIndex(exprDesc.getColumn()); VectorExpression expr = null; switch (mode) { @@ -1988,7 +2061,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); } - public Map getOutputColumnTypeMap() { + public Map getScratchColumnTypeMap() { Map map = new HashMap(); for (int i = 0; i < ocm.outputColCount; i++) { String type = ocm.outputColumnsTypes[i]; @@ -1997,15 +2070,26 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b return map; } - public Map getColumnMap() { - return columnMap; - } + public String toString() { + StringBuilder sb = new StringBuilder(32); + sb.append("Context key ").append(getFileKey()).append(", "); + + Comparator comparerInteger = new Comparator() { + @Override + public int compare(Integer o1, Integer o2) { + return o1.compareTo(o2); + }}; - public void addToColumnMap(String columnName, int outputColumn) throws HiveException { - if (columnMap.containsKey(columnName) && (columnMap.get(columnName) != outputColumn)) { - throw new HiveException(String.format("Column %s is already mapped to %d. Cannot remap to %d.", - columnName, columnMap.get(columnName), outputColumn)); + Map sortedColumnMap = new TreeMap(comparerInteger); + for (Map.Entry entry : projectionColumnMap.entrySet()) { + sortedColumnMap.put(entry.getValue(), entry.getKey()); } - columnMap.put(columnName, outputColumn); + sb.append("sortedProjectionColumnMap ").append(sortedColumnMap).append(", "); + + Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); + sortedScratchColumnTypeMap.putAll(getScratchColumnTypeMap()); + sb.append("sortedScratchColumnTypeMap ").append(sortedScratchColumnTypeMap); + + return sb.toString(); } - } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 81a8ee7..4f57aac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -139,7 +139,7 @@ public VectorizedRowBatchCtx() { public void init(Configuration hiveConf, String fileKey, StructObjectInspector rowOI) { Map> scratchColumnVectorTypes = - Utilities.getScratchColumnVectorTypes(hiveConf); + Utilities.getAllScratchColumnVectorTypeMaps(hiveConf); columnTypeMap = scratchColumnVectorTypes.get(fileKey); this.rowOI= rowOI; this.rawRowOI = rowOI; @@ -190,7 +190,7 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx String partitionPath = split.getPath().getParent().toString(); columnTypeMap = Utilities - .getScratchColumnVectorTypes(hiveConf) + .getAllScratchColumnVectorTypeMaps(hiveConf) .get(partitionPath); Properties partProps = diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 786d6b7..2c02bd4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -28,6 +29,7 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import java.util.TreeMap; import java.util.regex.Pattern; import org.apache.commons.logging.Log; @@ -66,12 +68,10 @@ import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; -import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; @@ -119,7 +119,11 @@ import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.util.ReflectionUtils; public class Vectorizer implements PhysicalPlanResolver { @@ -280,13 +284,13 @@ public Vectorizer() { private PhysicalContext pctx; - private int keyColCount; - private int valueColCount; + private List reduceColumnNames; + private List reduceTypeInfos; public VectorizationDispatcher(PhysicalContext pctx) { this.pctx = pctx; - keyColCount = 0; - valueColCount = 0; + reduceColumnNames = null; + reduceTypeInfos = null; } @Override @@ -374,14 +378,13 @@ private void vectorizeMapWork(MapWork mapWork) throws SemanticException { HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - Map> columnVectorTypes = vnp.getScratchColumnVectorTypes(); - mapWork.setScratchColumnVectorTypes(columnVectorTypes); - Map> columnMap = vnp.getScratchColumnMap(); - mapWork.setScratchColumnMap(columnMap); + Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); + mapWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); + Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); + mapWork.setAllColumnVectorMaps(allColumnVectorMaps); if (LOG.isDebugEnabled()) { - LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString())); - LOG.debug(String.format("columnMap: %s", columnMap.toString())); + debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); } return; @@ -402,7 +405,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws Sema return false; } StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector; - keyColCount = keyStructObjectInspector.getAllStructFieldRefs().size(); + List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); // Tez doesn't use tagging... if (reduceWork.getNeedsTagging()) { @@ -415,9 +418,20 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws Sema !(valueObjectInspector instanceof StructObjectInspector)) { return false; } - StructObjectInspector valueStructObjectInspector = - (StructObjectInspector)valueObjectInspector; - valueColCount = valueStructObjectInspector.getAllStructFieldRefs().size(); + StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; + List valueFields = valueStructObjectInspector.getAllStructFieldRefs(); + + reduceColumnNames = new ArrayList(); + reduceTypeInfos = new ArrayList(); + + for (StructField field: keyFields) { + reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); + reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); + } + for (StructField field: valueFields) { + reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); + reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); + } } catch (Exception e) { throw new SemanticException(e); } @@ -467,7 +481,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(reduceWork, keyColCount, valueColCount); + new ReduceWorkVectorizationNodeProcessor(reduceColumnNames); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -484,18 +498,17 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException Operator reducer = reduceWork.getReducer(); if (reducer.getType().equals(OperatorType.EXTRACT)) { - ((VectorExtractOperator)reducer).setKeyAndValueColCounts(keyColCount, valueColCount); + ((VectorExtractOperator)reducer).setReduceTypeInfos(reduceTypeInfos); } - Map> columnVectorTypes = vnp.getScratchColumnVectorTypes(); - reduceWork.setScratchColumnVectorTypes(columnVectorTypes); - Map> columnMap = vnp.getScratchColumnMap(); - reduceWork.setScratchColumnMap(columnMap); + Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); + reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); + Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); + reduceWork.setAllColumnVectorMaps(allColumnVectorMaps); if (LOG.isDebugEnabled()) { - LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString())); - LOG.debug(String.format("columnMap: %s", columnMap.toString())); + debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); } } } @@ -560,26 +573,26 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected final Set> opsDone = new HashSet>(); - public Map> getScratchColumnVectorTypes() { - Map> scratchColumnVectorTypes = + public Map> getAllScratchColumnVectorTypeMaps() { + Map> allScratchColumnVectorTypeMaps = new HashMap>(); for (String onefile : scratchColumnContext.keySet()) { VectorizationContext vc = scratchColumnContext.get(onefile); - Map cmap = vc.getOutputColumnTypeMap(); - scratchColumnVectorTypes.put(onefile, cmap); + Map cmap = vc.getScratchColumnTypeMap(); + allScratchColumnVectorTypeMaps.put(onefile, cmap); } - return scratchColumnVectorTypes; + return allScratchColumnVectorTypeMaps; } - public Map> getScratchColumnMap() { - Map> scratchColumnMap = + public Map> getAllColumnVectorMaps() { + Map> allColumnVectorMaps = new HashMap>(); for(String oneFile: scratchColumnContext.keySet()) { VectorizationContext vc = scratchColumnContext.get(oneFile); - Map cmap = vc.getColumnMap(); - scratchColumnMap.put(oneFile, cmap); + Map cmap = vc.getProjectionColumnMap(); + allColumnVectorMaps.put(oneFile, cmap); } - return scratchColumnMap; + return allColumnVectorMaps; } public VectorizationContext walkStackToFindVectorizationContext(Stack stack, @@ -665,10 +678,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, vContext.setFileKey(onefile); scratchColumnContext.put(onefile, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + op.getName() + - " with vectorization context key=" + vContext.getFileKey() + - ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vContext.getColumnMap().toString()); + LOG.debug("Vectorized MapWork operator " + op.getName() + " vectorization context " + vContext.toString()); } break; } @@ -699,17 +709,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + - " with vectorization context key=" + vContext.getFileKey() + - ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vContext.getColumnMap().toString()); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + - " added new vectorization context key=" + vOutContext.getFileKey() + - ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vOutContext.getColumnMap().toString()); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); } } @@ -719,10 +723,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { - private final ReduceWork rWork; - private int keyColCount; - private int valueColCount; - private Map reduceColumnNameMap; + private List reduceColumnNames; private VectorizationContext reduceShuffleVectorizationContext; @@ -732,12 +733,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, - int valueColCount) { - this.rWork = rWork; - reduceColumnNameMap = rWork.getReduceColumnNameMap(); - this.keyColCount = keyColCount; - this.valueColCount = valueColCount; + public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames) { + this.reduceColumnNames = reduceColumnNames; rootVectorOp = null; reduceShuffleVectorizationContext = null; } @@ -755,17 +752,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean saveRootVectorOp = false; if (op.getParentOperators().size() == 0) { - vContext = getReduceVectorizationContext(reduceColumnNameMap); + LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString()); + + vContext = new VectorizationContext(reduceColumnNames); vContext.setFileKey("_REDUCE_SHUFFLE_"); scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); reduceShuffleVectorizationContext = vContext; saveRootVectorOp = true; if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context key=" + - vContext.getFileKey() + - ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vContext.getColumnMap().toString()); + LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString()); } } else { vContext = walkStackToFindVectorizationContext(stack, op); @@ -791,17 +787,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + - " with vectorization context key=" + vContext.getFileKey() + - ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vContext.getColumnMap().toString()); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + - " added new vectorization context key=" + vOutContext.getFileKey() + - ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + - ", columnMap: " + vOutContext.getColumnMap().toString()); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); } } if (vectorOp instanceof VectorGroupByOperator) { @@ -819,7 +809,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private static class ValidatorVectorizationContext extends VectorizationContext { private ValidatorVectorizationContext() { - super(null, -1); + super(); } @Override @@ -1222,21 +1212,17 @@ private VectorizationContext getVectorizationContext(Operator op, PhysicalContext pctx) { RowSchema rs = op.getSchema(); - Map cmap = new HashMap(); - int columnCount = 0; + // Add all non-virtual columns to make a vectorization context for + // the TableScan operator. + VectorizationContext vContext = new VectorizationContext(); for (ColumnInfo c : rs.getSignature()) { // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). if (!isVirtualColumn(c)) { - cmap.put(c.getInternalName(), columnCount++); + vContext.addInitialColumn(c.getInternalName()); } } - - return new VectorizationContext(cmap, columnCount); - } - - private VectorizationContext getReduceVectorizationContext( - Map reduceColumnNameMap) { - return new VectorizationContext(reduceColumnNameMap, reduceColumnNameMap.size()); + vContext.finishedAddingInitialColumns(); + return vContext; } private void fixupParentChildOperators(Operator op, @@ -1292,4 +1278,41 @@ private boolean isVirtualColumn(ColumnInfo column) { } return false; } + + public void debugDisplayAllMaps(Map> allColumnVectorMaps, + Map> allScratchColumnVectorTypeMaps) { + + // Context keys grow in length since they are a path... + Comparator comparerShorterString = new Comparator() { + @Override + public int compare(String o1, String o2) { + Integer length1 = o1.length(); + Integer length2 = o2.length(); + return length1.compareTo(length2); + }}; + + Comparator comparerInteger = new Comparator() { + @Override + public int compare(Integer o1, Integer o2) { + return o1.compareTo(o2); + }}; + + Map> sortedAllColumnVectorMaps = new TreeMap>(comparerShorterString); + for (Map.Entry> entry : allColumnVectorMaps.entrySet()) { + Map sortedColumnMap = new TreeMap(comparerInteger); + for (Map.Entry innerEntry : entry.getValue().entrySet()) { + sortedColumnMap.put(innerEntry.getValue(), innerEntry.getKey()); + } + sortedAllColumnVectorMaps.put(entry.getKey(), sortedColumnMap); + } + LOG.debug("sortedAllColumnVectorMaps " + sortedAllColumnVectorMaps); + + Map> sortedAllScratchColumnVectorTypeMap = new TreeMap>(comparerShorterString); + for (Map.Entry> entry : allScratchColumnVectorTypeMaps.entrySet()) { + Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); + sortedScratchColumnTypeMap.putAll(entry.getValue()); + sortedAllScratchColumnVectorTypeMap.put(entry.getKey(), sortedScratchColumnTypeMap); + } + LOG.debug("sortedAllScratchColumnVectorTypeMap " + sortedAllScratchColumnVectorTypeMap); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 9d8d52d..a1cc90d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -56,8 +56,8 @@ public BaseWork(String name) { private String name; // Vectorization. - protected Map> scratchColumnVectorTypes = null; - protected Map> scratchColumnMap = null; + protected Map> allScratchColumnVectorTypeMaps = null; + protected Map> allColumnVectorMaps = null; protected boolean vectorMode = false; public void setGatheringStats(boolean gatherStats) { @@ -115,21 +115,21 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { return returnSet; } - public Map> getScratchColumnVectorTypes() { - return scratchColumnVectorTypes; + public Map> getAllScratchColumnVectorTypeMaps() { + return allScratchColumnVectorTypeMaps; } - public void setScratchColumnVectorTypes( - Map> scratchColumnVectorTypes) { - this.scratchColumnVectorTypes = scratchColumnVectorTypes; + public void setAllScratchColumnVectorTypeMaps( + Map> allScratchColumnVectorTypeMaps) { + this.allScratchColumnVectorTypeMaps = allScratchColumnVectorTypeMaps; } - public Map> getScratchColumnMap() { - return scratchColumnMap; + public Map> getAllColumnVectorMaps() { + return allColumnVectorMaps; } - public void setScratchColumnMap(Map> scratchColumnMap) { - this.scratchColumnMap = scratchColumnMap; + public void setAllColumnVectorMaps(Map> allColumnVectorMaps) { + this.allColumnVectorMaps = allColumnVectorMaps; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 53ba031..c78184b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -95,8 +95,6 @@ public ReduceWork(String name) { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; - private final Map reduceColumnNameMap = new LinkedHashMap(); - /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -142,58 +140,6 @@ public ObjectInspector getValueObjectInspector() { return valueObjectInspector; } - private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) { - List fields = structObjectInspector.getAllStructFieldRefs(); - int index = startIndex; - for (StructField field: fields) { - reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index); - index++; - } - return index; - } - - public Boolean fillInReduceColumnNameMap() { - ObjectInspector keyObjectInspector = getKeyObjectInspector(); - if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) { - return false; - } - StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector; - - ObjectInspector valueObjectInspector = getValueObjectInspector(); - if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) { - return false; - } - StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector; - - int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString()); - addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString()); - return true; - } - - public Map getReduceColumnNameMap() { - if (needsTagging) { - return null; - } - if (reduceColumnNameMap.size() == 0) { - if (!fillInReduceColumnNameMap()) { - return null; - } - } - return reduceColumnNameMap; - } - - public List getReduceColumnNames() { - if (needsTagging) { - return null; - } - if (reduceColumnNameMap.size() == 0) { - if (!fillInReduceColumnNameMap()) { - return null; - } - } - return new ArrayList(reduceColumnNameMap.keySet()); - } - public List getTagToValueDesc() { return tagToValueDesc; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index 24a52a6..27c1a84 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import junit.framework.Assert; @@ -83,9 +85,9 @@ private LongColumnVector getLongVector(int len) { private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 1); + List columns = new ArrayList(); + columns.add("col1"); + VectorizationContext vc = new VectorizationContext(columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); return new VectorFilterOperator(vc, fdesc); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 525aa99..c147483 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -170,10 +170,10 @@ private static GroupByDesc buildKeyGroupByDesc( @Test public void testMemoryPressureFlush() throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("Key", 0); - mapColumnNames.put("Value", 1); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("Key"); + mapColumnNames.add("Value"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildKeyGroupByDesc (ctx, "max", "Value", TypeInfoFactory.longTypeInfo, @@ -1710,7 +1710,7 @@ private void testMultiKey( mapColumnNames.put("value", i); outputColumnNames.add("value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, i+1); + VectorizationContext ctx = new VectorizationContext(outputColumnNames); ArrayList aggs = new ArrayList(1); aggs.add( @@ -1818,10 +1818,10 @@ private void testKeyTypeAggregate( FakeVectorRowBatchFromObjectIterables data, Map expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("Key", 0); - mapColumnNames.put("Value", 1); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("Key"); + mapColumnNames.add("Value"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); Set keys = new HashSet(); AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, @@ -2233,9 +2233,9 @@ public static Validator getValidator(String aggregate) throws HiveException { public void testAggregateCountStarIterable ( Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); @@ -2262,9 +2262,9 @@ public void testAggregateCountStarIterable ( public void testAggregateCountReduceIterable ( Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); @@ -2294,9 +2294,9 @@ public void testAggregateStringIterable ( String aggregateName, Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.stringTypeInfo); @@ -2325,9 +2325,9 @@ public void testAggregateDecimalIterable ( String aggregateName, Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); @@ -2357,9 +2357,9 @@ public void testAggregateDoubleIterable ( String aggregateName, Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", TypeInfoFactory.doubleTypeInfo); @@ -2388,9 +2388,9 @@ public void testAggregateLongIterable ( String aggregateName, Iterable data, Object expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("A", 0); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.longTypeInfo); @@ -2418,10 +2418,11 @@ public void testAggregateLongKeyIterable ( String aggregateName, Iterable data, HashMap expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("Key", 0); - mapColumnNames.put("Value", 1); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("Key"); + mapColumnNames.add("Value"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); + Set keys = new HashSet(); GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", @@ -2484,10 +2485,10 @@ public void testAggregateStringKeyIterable ( Iterable data, TypeInfo dataTypeInfo, HashMap expected) throws HiveException { - Map mapColumnNames = new HashMap(); - mapColumnNames.put("Key", 0); - mapColumnNames.put("Value", 1); - VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2); + List mapColumnNames = new ArrayList(); + mapColumnNames.add("Key"); + mapColumnNames.add("Value"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); Set keys = new HashSet(); GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 486089b..3c004a1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -84,9 +84,11 @@ public void forward(Object row, ObjectInspector rowInspector) throws HiveExcepti @Test public void testSelectOperator() throws HiveException { - Map columnMap = new HashMap(); - columnMap.put("a", 0); columnMap.put("b", 1); columnMap.put("c", 2); - VectorizationContext vc = new VectorizationContext(columnMap, 3); + List columns = new ArrayList(); + columns.add("a"); + columns.add("b"); + columns.add("c"); + VectorizationContext vc = new VectorizationContext(columns); SelectDesc selDesc = new SelectDesc(false); List colList = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 9e3628f..efe2efe 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -247,16 +247,16 @@ public void testArithmeticExpressionVectorization() throws HiveException { children5.add(col6Expr); modExpr.setChildren(children5); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - columnMap.put("col3", 3); - columnMap.put("col4", 4); - columnMap.put("col5", 5); - columnMap.put("col6", 6); + VectorizationContext vc = new VectorizationContext(); + vc.addInitialColumn("col1"); + vc.addInitialColumn("col2"); + vc.addInitialColumn("col3"); + vc.addInitialColumn("col4"); + vc.addInitialColumn("col5"); + vc.addInitialColumn("col6"); + vc.finishedAddingInitialColumns(); //Generate vectorized expression - VectorizationContext vc = new VectorizationContext(columnMap, 6); VectorExpression ve = vc.getVectorExpression(sumExpr, VectorExpressionDescriptor.Mode.PROJECTION); //Verify vectorized expression @@ -293,11 +293,11 @@ public void testStringFilterExpressions() throws HiveException { children1.add(constDesc); exprDesc.setChildren(children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -318,11 +318,11 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -341,7 +341,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -360,7 +360,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -378,7 +378,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -395,7 +395,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -412,7 +412,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -432,10 +432,9 @@ public void testFloatInExpressions() throws HiveException { ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, udf, children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col1"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -477,11 +476,11 @@ public void testVectorizeFilterAndOrExpression() throws HiveException { children3.add(lessExprDesc); andExprDesc.setChildren(children3); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -528,11 +527,10 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { children3.add(col2Expr); andExprDesc.setChildren(children3); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veAnd.getClass(), FilterExprAndExpr.class); assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -557,7 +555,7 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { orExprDesc.setChildren(children4); //Allocate new Vectorization context to reset the intermediate columns. - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veOr.getClass(), FilterExprOrExpr.class); assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -594,11 +592,11 @@ public void testNotExpression() throws HiveException { childOfNot.add(greaterExprDesc); notExpr.setChildren(childOfNot); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(notExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -632,11 +630,10 @@ public void testNullExpressions() throws HiveException { childOfIsNull.add(greaterExprDesc); isNullExpr.setChildren(childOfIsNull); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(isNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -674,11 +671,10 @@ public void testNotNullExpressions() throws HiveException { childOfNot.add(greaterExprDesc); isNotNullExpr.setChildren(childOfNot); - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(isNotNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -705,10 +701,9 @@ public void testVectorizeScalarColumnExpression() throws HiveException { ExprNodeGenericFuncDesc scalarMinusConstant = new ExprNodeGenericFuncDesc(TypeInfoFactory.longTypeInfo, gudf, children); - Map columnMap = new HashMap(); - columnMap.put("a", 0); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(scalarMinusConstant, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(ve.getClass(), LongScalarSubtractLongColumn.class); @@ -727,11 +722,11 @@ public void testFilterWithNegativeScalar() throws HiveException { children1.add(constDesc); exprDesc.setChildren(children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -746,9 +741,10 @@ public void testUnaryMinusColumnLong() throws HiveException { children.add(col1Expr); ExprNodeGenericFuncDesc negExprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.longTypeInfo, gudf, children); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 1); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -763,9 +759,10 @@ public void testUnaryMinusColumnDouble() throws HiveException { children.add(col1Expr); ExprNodeGenericFuncDesc negExprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, gudf, children); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 1); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -788,10 +785,9 @@ public void testFilterScalarCompareColumn() throws HiveException { scalarGreaterColExpr.setChildren(children); - Map columnMap = new HashMap(); - columnMap.put("a", 0); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(scalarGreaterColExpr, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongScalarGreaterLongColumn.class, ve.getClass()); } @@ -812,10 +808,9 @@ public void testFilterBooleanColumnCompareBooleanScalar() throws HiveException { colEqualScalar.setChildren(children); - Map columnMap = new HashMap(); - columnMap.put("a", 0); - - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongColEqualLongScalar.class, ve.getClass()); } @@ -836,9 +831,9 @@ public void testBooleanColumnCompareBooleanScalar() throws HiveException { colEqualScalar.setChildren(children); - Map columnMap = new HashMap(); - columnMap.put("a", 0); - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(LongColEqualLongScalar.class, ve.getClass()); } @@ -852,11 +847,10 @@ public void testUnaryStringExpressions() throws HiveException { children.add(colDesc); stringUnary.setChildren(children); - Map columnMap = new HashMap(); - columnMap.put("b", 0); - columnMap.put("a", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 2); - + List columns = new ArrayList(); + columns.add("b"); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); GenericUDF stringLower = new GenericUDFLower(); stringUnary.setGenericUDF(stringLower); @@ -866,7 +860,7 @@ public void testUnaryStringExpressions() throws HiveException { assertEquals(1, ((StringLower) ve).getColNum()); assertEquals(2, ((StringLower) ve).getOutputColumn()); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ExprNodeGenericFuncDesc anotherUnary = new ExprNodeGenericFuncDesc(); anotherUnary.setTypeInfo(TypeInfoFactory.stringTypeInfo); @@ -898,10 +892,10 @@ public void testMathFunctions() throws HiveException { children1.add(colDesc1); children2.add(colDesc2); - Map columnMap = new HashMap(); - columnMap.put("b", 0); - columnMap.put("a", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("b"); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); // Sin(double) GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName()); @@ -989,10 +983,10 @@ public void testTimeStampUdfs() throws HiveException { List children = new ArrayList(); children.add(colDesc1); - Map columnMap = new HashMap(); - columnMap.put("b", 0); - columnMap.put("a", 1); - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("b"); + columns.add("a"); + VectorizationContext vc = new VectorizationContext(columns); //UDFYear GenericUDFBridge gudfBridge = new GenericUDFBridge("year", false, UDFYear.class.getName()); @@ -1026,10 +1020,12 @@ public void testBetweenFilters() throws HiveException { ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); + VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnBetween); @@ -1054,7 +1050,7 @@ public void testBetweenFilters() throws HiveException { exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterCharColumnBetween); @@ -1079,7 +1075,7 @@ public void testBetweenFilters() throws HiveException { exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columnMap, 2); + vc = new VectorizationContext(columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterVarCharColumnBetween); @@ -1144,10 +1140,11 @@ public void testInFiltersAndExprs() throws HiveException { ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - VectorizationContext vc = new VectorizationContext(columnMap, 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnInList); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -1197,11 +1194,12 @@ public void testIfConditionalExprs() throws HiveException { ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - Map columnMap = new HashMap(); - columnMap.put("col1", 1); - columnMap.put("col2", 2); - columnMap.put("col3", 3); - VectorizationContext vc = new VectorizationContext(columnMap, 3); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + columns.add("col3"); + VectorizationContext vc = new VectorizationContext(columns); VectorExpression ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongColumnLongColumn); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index a15a7a7..3635fc7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -1298,8 +1298,8 @@ JobConf createMockExecutionEnvironment(Path workDir, } mapWork.setPathToAliases(aliasMap); mapWork.setPathToPartitionInfo(partMap); - mapWork.setScratchColumnMap(new HashMap>()); - mapWork.setScratchColumnVectorTypes(new HashMap>()); + mapWork.setAllScratchColumnVectorTypeMaps(new HashMap>()); // write the plan out diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index fbab3af..3271189 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -45,13 +45,14 @@ @Before public void setUp() { - Map columnMap = new HashMap(); - columnMap.put("col1", 0); - columnMap.put("col2", 1); - columnMap.put("col3", 2); + List columns = new ArrayList(); + columns.add("col0"); + columns.add("col1"); + columns.add("col2"); + columns.add("col3"); //Generate vectorized expression - vContext = new VectorizationContext(columnMap, 3); + vContext = new VectorizationContext(columns); } @Description(name = "fake", value = "FAKE") diff --git ql/src/test/queries/clientpositive/vector_bucket.q ql/src/test/queries/clientpositive/vector_bucket.q new file mode 100644 index 0000000..19a09c4 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_bucket.q @@ -0,0 +1,11 @@ +SET hive.vectorized.execution.enabled=true; +set hive.support.concurrency=true; +set hive.enforce.bucketing=true; + +CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile; + + +explain +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; + +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; diff --git ql/src/test/results/clientpositive/tez/vector_bucket.q.out ql/src/test/results/clientpositive/tez/vector_bucket.q.out new file mode 100644 index 0000000..e4deb04 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_bucket.q.out @@ -0,0 +1,105 @@ +PREHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_orc_table +PREHOOK: query: explain +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: tmp_values_col1 (type: string), tmp_values_col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +PREHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@non_orc_table +POSTHOOK: Lineage: non_orc_table.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: non_orc_table.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +1 one +1 one +2 two +3 three diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 2e66485..59f872a 100644 --- ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -110,6 +110,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator @@ -241,6 +242,7 @@ STAGE PLANS: sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out new file mode 100644 index 0000000..952c033 --- /dev/null +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_orc_table +PREHOOK: query: explain +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: tmp_values_col1 (type: string), tmp_values_col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.non_orc_table + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +PREHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@non_orc_table +POSTHOOK: query: insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@non_orc_table +POSTHOOK: Lineage: non_orc_table.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: non_orc_table.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a, b from non_orc_table order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +POSTHOOK: query: select a, b from non_orc_table order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_orc_table +#### A masked pattern was here #### +1 one +1 one +2 two +3 three