diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index d153fd6..20b92de 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -85,6 +85,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index 46d66bd..7468c2f 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -77,6 +77,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 9a48171..6b91fc2 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -83,6 +83,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 3cdf7e2..749e97e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -81,6 +81,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index cdce457..9dfc147 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -93,6 +93,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Text result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 7e34965..32ecb34 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -83,6 +83,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + private transient VectorExpressionWriter resultWriter; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index cc7e54d..bd0f14d 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -78,6 +78,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final result; public (VectorExpression inputExpression) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index c6c9c52..dc9d4b1 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -84,6 +84,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 8fc94ba..01062a9 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -111,6 +111,12 @@ public class extends VectorAggregateExpression { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index b8a4693..646992f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; +import org.apache.commons.lang3.tuple.ImmutablePair; import java.io.OutputStream; import java.io.PrintStream; import java.io.Serializable; @@ -35,30 +36,67 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Stack; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.jsonexplain.JsonParser; import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.Validator.StringSet; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.AnnotationUtils; @@ -157,6 +195,54 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E return outJSONObject; } + private static String trueCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS true"; + private static String falseCondNameVectorizationEnabled = + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS false"; + + private ImmutablePair outputPlanVectorization(PrintStream out, boolean jsonOutput) + throws Exception { + + if (out != null) { + out.println("PLAN VECTORIZATION:"); + } + + JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null; + + HiveConf hiveConf = queryState.getConf(); + + boolean isVectorizationEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); + String isVectorizationEnabledCondName = + (isVectorizationEnabled ? + trueCondNameVectorizationEnabled : + falseCondNameVectorizationEnabled); + List isVectorizationEnabledCondList = Arrays.asList(isVectorizationEnabledCondName); + + if (out != null) { + out.print(indentString(2)); + out.print("enabled: "); + out.println(isVectorizationEnabled); + out.print(indentString(2)); + if (!isVectorizationEnabled) { + out.print("enabledConditionsNotMet: "); + } else { + out.print("enabledConditionsMet: "); + } + out.println(isVectorizationEnabledCondList); + } + if (jsonOutput) { + json.put("enabled", isVectorizationEnabled); + if (!isVectorizationEnabled) { + json.put("enabledConditionsNotMet", isVectorizationEnabledCondList); + } else { + json.put("enabledConditionsMet", isVectorizationEnabledCondList); + } + } + + return new ImmutablePair(isVectorizationEnabled, jsonOutput ? json : null); + } + public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), @@ -184,26 +270,46 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc ordered.add(fetchTask); } - JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); + boolean suppressOthersForVectorization = false; + if (this.work != null && this.work.isVectorization()) { + ImmutablePair planVecPair = outputPlanVectorization(out, jsonOutput); + + if (this.work.isVectorizationOnly()) { + // Suppress the STAGES if vectorization is off. + suppressOthersForVectorization = !planVecPair.left; + } - if (out != null) { - out.println(); + if (out != null) { + out.println(); + } + + if (jsonOutput) { + outJSONObject.put("PLAN VECTORIZATION", planVecPair.right); + } } - if (jsonOutput) { - outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); - } + if (!suppressOthersForVectorization) { + JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); - // Go over all the tasks and dump out the plans - JSONObject jsonPlan = outputStagePlans(out, ordered, - jsonOutput, isExtended); + if (out != null) { + out.println(); + } - if (jsonOutput) { - outJSONObject.put("STAGE PLANS", jsonPlan); - } + if (jsonOutput) { + outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies); + } - if (fetchTask != null) { - fetchTask.setParentTasks(null); + // Go over all the tasks and dump out the plans + JSONObject jsonPlan = outputStagePlans(out, ordered, + jsonOutput, isExtended); + + if (jsonOutput) { + outJSONObject.put("STAGE PLANS", jsonPlan); + } + + if (fetchTask != null) { + fetchTask.setParentTasks(null); + } } return jsonOutput ? outJSONObject : null; @@ -602,6 +708,22 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + if (vectorization == Vectorization.NON_VECTORIZED) { + if (this.work.isVectorizationOnly()) { + invokeFlag = false; + } + } else { + if (vectorization == Vectorization.DETAIL && !this.work.isVectorizationDetail()) { + invokeFlag = false; + } + } + } else if (vectorization != Vectorization.NON_VECTORIZED && vectorization != Vectorization.PATH) { + invokeFlag = false; + } + } + if (invokeFlag) { keyJSONObject = xpl_note.displayName(); if (out != null) { out.print(indentString(indent)); @@ -675,6 +797,23 @@ private JSONObject outputPlan(Object work, PrintStream out, } } if (invokeFlag) { + Vectorization vectorization = xpl_note.vectorization(); + if (this.work != null && this.work.isVectorization()) { + if (vectorization == Vectorization.NON_VECTORIZED) { + if (this.work.isVectorizationOnly()) { + invokeFlag = false; + } + } else { + if (vectorization == Vectorization.DETAIL && !this.work.isVectorizationDetail()) { + invokeFlag = false; + } + } + } else if (vectorization != Vectorization.NON_VECTORIZED && vectorization != Vectorization.PATH) { + // Don' show vectorization related objects. + invokeFlag = false; + } + } + if (invokeFlag) { Object val = null; try { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 416606e..c070c4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -93,7 +93,7 @@ protected transient ReusableGetAdaptor[] hashMapRowGetters; private UnwrapRowContainer[] unwrapContainer; - private transient Configuration hconf; + protected transient Configuration hconf; private transient boolean hybridMapJoinLeftover; // whether there's spilled data to be processed protected transient MapJoinBytesTableContainer[] spilledMapJoinTables; // used to hold restored // spilled small tables diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 038b96c..af1fa66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; +import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; +import org.apache.hadoop.hive.ql.plan.AbstractVectorDesc; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; @@ -73,6 +75,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import com.google.common.base.Preconditions; @@ -142,6 +145,8 @@ Class> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext) throws HiveException { try { + VectorDesc vectorDesc = ((AbstractOperatorDesc) conf).getVectorDesc(); + vectorDesc.setVectorOp(opClass); Operator op = (Operator) opClass.getDeclaredConstructor( CompilationOpContext.class, VectorizationContext.class, OperatorDesc.class) .newInstance(cContext, vContext, conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 9049ddd..42c7d36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -201,5 +201,4 @@ public boolean isIdentitySelect() { return true; } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 9f27f56..cbe83be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -93,9 +93,7 @@ public static boolean isSupportedField(ObjectInspector foi) { return true; } - public static boolean isSupportedField(String typeName) { - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - + public static boolean isSupportedField(TypeInfo typeInfo) { if (typeInfo.getCategory() != Category.PRIMITIVE) return false; // not supported PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; PrimitiveCategory pc = primitiveTypeInfo.getPrimitiveCategory(); @@ -103,6 +101,11 @@ public static boolean isSupportedField(String typeName) { return true; } + public static boolean isSupportedField(String typeName) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return isSupportedField(typeInfo); + } + public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject, List keyOIs, boolean mayReuseKey) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 1634f42..3cf6561 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -75,7 +75,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati this.desc = joinOp.getConf(); if (desc.getVectorMode() && HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); useFastContainer = vectorDesc != null && vectorDesc.hashTableImplementationType() == VectorMapJoinDesc.HashTableImplementationType.FAST; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java index c4b95c3..c890674 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java @@ -20,6 +20,8 @@ import java.util.Arrays; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + /** * This class collects column information for copying a row from one VectorizedRowBatch to * same/another batch. @@ -30,7 +32,7 @@ protected int[] sourceColumns; protected int[] outputColumns; - protected String[] typeNames; + protected TypeInfo[] typeInfos; protected VectorColumnOrderedMap vectorColumnMapping; @@ -38,7 +40,7 @@ public VectorColumnMapping(String name) { this.vectorColumnMapping = new VectorColumnOrderedMap(name); } - public abstract void add(int sourceColumn, int outputColumn, String typeName); + public abstract void add(int sourceColumn, int outputColumn, TypeInfo typeInfo); public abstract void finalize(); @@ -54,8 +56,8 @@ public int getCount() { return outputColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } @Override @@ -65,7 +67,7 @@ public String toString() { sb.append(", "); sb.append("output columns: " + Arrays.toString(outputColumns)); sb.append(", "); - sb.append("type names: " + Arrays.toString(typeNames)); + sb.append("type infos: " + Arrays.toString(typeInfos)); return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java index 0e6014b..97d55f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java @@ -23,8 +23,10 @@ import java.util.TreeMap; import org.apache.commons.lang.ArrayUtils; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for mapping vector columns, including the hive type name. @@ -43,17 +45,17 @@ private class Value { int valueColumn; - String typeName; + TypeInfo typeInfo; - Value(int valueColumn, String typeName) { + Value(int valueColumn, TypeInfo typeInfo) { this.valueColumn = valueColumn; - this.typeName = typeName; + this.typeInfo = typeInfo; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("(value column: " + valueColumn); - sb.append(", type name: " + typeName + ")"); + sb.append(", type info: " + typeInfo.toString() + ")"); return sb.toString(); } } @@ -62,12 +64,12 @@ public String toString() { private final int[] orderedColumns; private final int[] valueColumns; - private final String[] typeNames; + private final TypeInfo[] typeInfos; - Mapping(int[] orderedColumns, int[] valueColumns, String[] typeNames) { + Mapping(int[] orderedColumns, int[] valueColumns, TypeInfo[] typeInfos) { this.orderedColumns = orderedColumns; this.valueColumns = valueColumns; - this.typeNames = typeNames; + this.typeInfos = typeInfos; } public int getCount() { @@ -82,8 +84,8 @@ public int getCount() { return valueColumns; } - public String[] getTypeNames() { - return typeNames; + public TypeInfo[] getTypeInfos() { + return typeInfos; } } @@ -92,14 +94,14 @@ public VectorColumnOrderedMap(String name) { orderedTreeMap = new TreeMap(); } - public void add(int orderedColumn, int valueColumn, String typeName) { + public void add(int orderedColumn, int valueColumn, TypeInfo typeInfo) { if (orderedTreeMap.containsKey(orderedColumn)) { throw new RuntimeException( name + " duplicate column " + orderedColumn + " in ordered column map " + orderedTreeMap.toString() + - " when adding value column " + valueColumn + ", type " + typeName); + " when adding value column " + valueColumn + ", type into " + typeInfo.toString()); } - orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeName)); + orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeInfo)); } public boolean orderedColumnsContain(int orderedColumn) { @@ -109,17 +111,16 @@ public boolean orderedColumnsContain(int orderedColumn) { public Mapping getMapping() { ArrayList orderedColumns = new ArrayList(); ArrayList valueColumns = new ArrayList(); - ArrayList typeNames = new ArrayList(); + ArrayList typeInfos = new ArrayList(); for (Map.Entry entry : orderedTreeMap.entrySet()) { orderedColumns.add(entry.getKey()); Value value = entry.getValue(); valueColumns.add(value.valueColumn); - typeNames.add(value.typeName); + typeInfos.add(value.typeInfo); } return new Mapping( ArrayUtils.toPrimitive(orderedColumns.toArray(new Integer[0])), ArrayUtils.toPrimitive(valueColumns.toArray(new Integer[0])), - typeNames.toArray(new String[0])); - + typeInfos.toArray(new TypeInfo[0])); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java index f35aff7..4ceff6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnOutputMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on outputColumn. - vectorColumnMapping.add(outputColumn, sourceColumn, typeName); + vectorColumnMapping.add(outputColumn, sourceColumn, typeInfo); } public boolean containsOutputColumn(int outputColumn) { @@ -51,7 +52,7 @@ public void finalize() { // Ordered columns are the output columns. sourceColumns = mapping.getValueColumns(); outputColumns = mapping.getOrderedColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java index 4f5ba9a..061e396 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * This class collects column information for copying a row from one VectorizedRowBatch to @@ -35,9 +36,9 @@ public VectorColumnSourceMapping(String name) { } @Override - public void add(int sourceColumn, int outputColumn, String typeName) { + public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) { // Order on sourceColumn. - vectorColumnMapping.add(sourceColumn, outputColumn, typeName); + vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo); } @Override @@ -47,7 +48,7 @@ public void finalize() { // Ordered columns are the source columns. sourceColumns = mapping.getOrderedColumns(); outputColumns = mapping.getValueColumns(); - typeNames = mapping.getTypeNames(); + typeInfos = mapping.getTypeInfos(); // Not needed anymore. vectorColumnMapping = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index c8e0284..911aeb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -262,8 +262,7 @@ public void init(VectorColumnMapping columnMapping) throws HiveException { for (int i = 0; i < count; i++) { int inputColumn = columnMapping.getInputColumns()[i]; int outputColumn = columnMapping.getOutputColumns()[i]; - String typeName = columnMapping.getTypeNames()[i].toLowerCase(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 261246b..bfe22b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import com.google.common.annotations.VisibleForTesting; @@ -50,9 +51,8 @@ public VectorFilterOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); - ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate(); - conditionEvaluator = vContext.getVectorExpression(oldExpression, VectorExpressionDescriptor.Mode.FILTER); this.conf = (FilterDesc) conf; + conditionEvaluator = ((VectorFilterDesc) this.conf.getVectorDesc()).getPredicateExpression(); } /** Kryo ctor. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..fef7c2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; @@ -65,6 +66,8 @@ private static final Logger LOG = LoggerFactory.getLogger( VectorGroupByOperator.class.getName()); + private VectorGroupByDesc vectorDesc; + /** * This is the vector of aggregators. They are stateless and only implement * the algorithm of how to compute the aggregation. state is kept in the @@ -756,16 +759,10 @@ public VectorGroupByOperator(CompilationOpContext ctx, this(ctx); GroupByDesc desc = (GroupByDesc) conf; this.conf = desc; - List keysDesc = desc.getKeys(); - keyExpressions = vContext.getVectorExpressions(keysDesc); - ArrayList aggrDesc = desc.getAggregators(); - aggregators = new VectorAggregateExpression[aggrDesc.size()]; - for (int i = 0; i < aggrDesc.size(); ++i) { - AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc); - } - - isVectorOutput = desc.getVectorDesc().isVectorOutput(); + vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); + keyExpressions = vectorDesc.getKeyExpressions(); + aggregators = vectorDesc.getAggregators(); + isVectorOutput = vectorDesc.isVectorOutput(); vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), /* vContextEnvironment */ vContext); @@ -834,7 +831,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; - switch (conf.getVectorDesc().getProcessingMode()) { + switch (vectorDesc.getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); processingMode = this.new ProcessingModeGlobalAggregate(); @@ -850,7 +847,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { break; default: throw new RuntimeException("Unsupported vector GROUP BY processing mode " + - conf.getVectorDesc().getProcessingMode().name()); + vectorDesc.getProcessingMode().name()); } processingMode.initialize(hconf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index f7fec8f..235b82d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -45,6 +46,8 @@ private static final long serialVersionUID = 1L; + private VectorSelectDesc vectorDesc; + protected VectorExpression[] vExpressions = null; private transient int [] projectedColumns = null; @@ -58,13 +61,8 @@ public VectorSelectOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); this.conf = (SelectDesc) conf; - List colList = this.conf.getColList(); - vExpressions = new VectorExpression[colList.size()]; - for (int i = 0; i < colList.size(); i++) { - ExprNodeDesc expr = colList.get(i); - VectorExpression ve = vContext.getVectorExpression(expr); - vExpressions[i] = ve; - } + vectorDesc = (VectorSelectDesc) this.conf.getVectorDesc(); + vExpressions = vectorDesc.getSelectExpressions(); /** * Create a new vectorization context to create a new projection, but keep @@ -73,6 +71,7 @@ public VectorSelectOperator(CompilationOpContext ctx, vOutContext = new VectorizationContext(getName(), vContext); vOutContext.resetProjectionColumns(); + List colList = this.conf.getColList(); for (int i=0; i < colList.size(); ++i) { String columnName = this.conf.getOutputColumnNames().get(i); VectorExpression ve = vExpressions[i]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f088941..66b11fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -560,7 +560,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress } else { throw new HiveException( "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=chosen " + + " because hive.vectorized.adaptor.usage.mode=chosen" + " and the UDF wasn't one of the chosen ones"); } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java index a403725..914bb1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java @@ -20,10 +20,10 @@ /** * VectorizationContextRegion optional interface implemented by vectorized operators - * that are changing the vectorizaiton context (region boundary operators) + * that are changing the vectorization context (region boundary operators) */ public interface VectorizationContextRegion { VectorizationContext getOuputVectorizationContext(); -} +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java index 96e62cf..9217149 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -55,7 +56,21 @@ public abstract void aggregateInputSelection(VectorAggregationBufferRow[] aggreg public boolean hasVariableSize() { return false; } + public abstract VectorExpression inputExpression(); public abstract void init(AggregationDesc desc) throws HiveException; + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(this.getClass().getSimpleName()); + VectorExpression inputExpression = inputExpression(); + if (inputExpression != null) { + sb.append("("); + sb.append(inputExpression.toString()); + sb.append(")"); + } + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java index d0ff5fa..05b76c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java @@ -114,6 +114,12 @@ public void reset() { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private HiveDecimalWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java index d0a1d0d..483d9dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java @@ -84,6 +84,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java index cf373a1..494febc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java @@ -60,6 +60,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCount(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 577977f..dec88cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -61,6 +61,12 @@ public void reset() { } private VectorExpression inputExpression = null; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final LongWritable result; public VectorUDAFCountMerge(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java index 72beda8..337ba0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java @@ -57,6 +57,13 @@ public void reset() { } } + + @Override + public VectorExpression inputExpression() { + // None. + return null; + } + transient private final LongWritable result; public VectorUDAFCountStar(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java index fa25e6a..8cd3506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java index b3e1fae..61d6977 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFStdSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java index 3a5fef6..8a71ce3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal.java @@ -88,6 +88,12 @@ public void reset() { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private final HiveDecimalWritable scratchDecimal; public VectorUDAFSumDecimal(VectorExpression inputExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java index 970ec22..2709b07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarPopTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java index 9af1a28..03dce1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFVarSampTimestamp.java @@ -83,6 +83,12 @@ public void reset () { } private VectorExpression inputExpression; + + @Override + public VectorExpression inputExpression() { + return inputExpression; + } + transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c288731..77b44fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; + import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -63,6 +65,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.base.Preconditions; + /** * This class is common operator class for native vectorized map join. * @@ -72,7 +76,43 @@ */ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implements VectorizationContextRegion { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinCommonOperator.class.getName(); +private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected abstract String getLoggingPrefix(); + + // For debug tracing: information about the map or reduce task, operator, operator class, etc. + protected transient String loggingPrefix; + + protected String getLoggingPrefix(String className) { + if (loggingPrefix == null) { + initLoggingPrefix(className); + } + return loggingPrefix; + } + + protected void initLoggingPrefix(String className) { + if (hconf == null) { + // Constructor time... + loggingPrefix = className; + } else { + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + loggingPrefix = className + " " + work.getName() + " " + getOperatorId(); + } + } + + //------------------------------------------------------------------------------------------------ + + protected VectorMapJoinDesc vectorDesc; + + protected VectorMapJoinInfo vectorMapJoinInfo; // Whether this operator is an outer join. protected boolean isOuterJoin; @@ -88,10 +128,10 @@ // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; - // The output column projection of the vectorized row batch. And, the type names of the output + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; - protected String[] outputTypeNames; + protected TypeInfo[] outputTypeInfos; // These are the vectorized batch expressions for filtering, key expressions, and value // expressions. @@ -101,15 +141,17 @@ // This is map of which vectorized row batch columns are the big table key columns. Since // we may have key expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableKeyColumnMap; - protected ArrayList bigTableKeyTypeNames; + protected String[] bigTableKeyColumnNames; + protected TypeInfo[] bigTableKeyTypeInfos; // Similarly, this is map of which vectorized row batch columns are the big table value columns. // Since we may have value expressions that produce new scratch columns, we need a mapping. - // And, we have their type names. + // And, we have their type infos. protected int[] bigTableValueColumnMap; - protected ArrayList bigTableValueTypeNames; + protected String[] bigTableValueColumnNames; + protected TypeInfo[] bigTableValueTypeInfos; // This is a mapping of which big table columns (input and key/value expressions) will be // part of the big table portion of the join output result. @@ -124,6 +166,8 @@ // to output batch scratch columns for the small table portion. protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping projectionMapping; + // These are the output columns for the small table and the outer small table keys. protected int[] smallTableOutputVectorColumns; protected int[] bigTableOuterKeyOutputVectorColumns; @@ -137,9 +181,6 @@ // transient. //--------------------------------------------------------------------------- - // For debug tracing: the name of the map or reduce task. - protected transient String taskName; - // The threshold where we should use a repeating vectorized row batch optimization for // generating join output results. protected transient boolean useOverflowRepeatedThreshold; @@ -192,6 +233,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, MapJoinDesc desc = (MapJoinDesc) conf; this.conf = desc; + vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); + Preconditions.checkState(vectorMapJoinInfo != null); this.vContext = vContext; @@ -210,214 +254,28 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); - List keyDesc = desc.getKeys().get(posBigTable); - bigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); - - // Since a key expression can be a calculation and the key will go into a scratch column, - // we need the mapping and type information. - bigTableKeyColumnMap = new int[bigTableKeyExpressions.length]; - bigTableKeyTypeNames = new ArrayList(); - boolean onlyColumns = true; - for (int i = 0; i < bigTableKeyColumnMap.length; i++) { - VectorExpression ve = bigTableKeyExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableKeyTypeNames.add(keyDesc.get(i).getTypeString()); - bigTableKeyColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableKeyExpressions = null; - } - - List bigTableExprs = desc.getExprs().get(posBigTable); - bigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); - - /* - * Similarly, we need a mapping since a value expression can be a calculation and the value - * will go into a scratch column. - */ - bigTableValueColumnMap = new int[bigTableValueExpressions.length]; - bigTableValueTypeNames = new ArrayList(); - onlyColumns = true; - for (int i = 0; i < bigTableValueColumnMap.length; i++) { - VectorExpression ve = bigTableValueExpressions[i]; - if (!IdentityExpression.isColumnOnly(ve)) { - onlyColumns = false; - } - bigTableValueTypeNames.add(bigTableExprs.get(i).getTypeString()); - bigTableValueColumnMap[i] = ve.getOutputColumn(); - } - if (onlyColumns) { - bigTableValueExpressions = null; - } - - determineCommonInfo(isOuterJoin); - } - - protected void determineCommonInfo(boolean isOuter) throws HiveException { - - bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping"); - - bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping"); - - // The order of the fields in the LazyBinary small table value must be used, so - // we use the source ordering flavor for the mapping. - smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping"); - - // We use a mapping object here so we can build the projection in any order and - // get the ordered by 0 to n-1 output columns at the end. - // - // Also, to avoid copying a big table key into the small table result area for inner joins, - // we reference it with the projection so there can be duplicate output columns - // in the projection. - VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); - - /* - * Gather up big and small table output result information from the MapJoinDesc. - */ - List bigTableRetainList = conf.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); - - int[] smallTableIndices; - int smallTableIndicesSize; - List smallTableExprs = conf.getExprs().get(posSingleVectorMapJoinSmallTable); - if (conf.getValueIndices() != null && conf.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { - smallTableIndices = conf.getValueIndices().get(posSingleVectorMapJoinSmallTable); - smallTableIndicesSize = smallTableIndices.length; - } else { - smallTableIndices = null; - smallTableIndicesSize = 0; - } - - List smallTableRetainList = conf.getRetainList().get(posSingleVectorMapJoinSmallTable); - int smallTableRetainSize = smallTableRetainList.size(); - - int smallTableResultSize = 0; - if (smallTableIndicesSize > 0) { - smallTableResultSize = smallTableIndicesSize; - } else if (smallTableRetainSize > 0) { - smallTableResultSize = smallTableRetainSize; - } - - /* - * Determine the big table retained mapping first so we can optimize out (with - * projection) copying inner join big table keys in the subsequent small table results section. - */ - int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); - for (int i = 0; i < bigTableRetainSize; i++) { - - // Since bigTableValueExpressions may do a calculation and produce a scratch column, we - // need to map to the right batch column. - - int retainColumn = bigTableRetainList.get(i); - int batchColumnIndex = bigTableValueColumnMap[retainColumn]; - String typeName = bigTableValueTypeNames.get(i); - - // With this map we project the big table batch to make it look like an output batch. - projectionMapping.add(nextOutputColumn, batchColumnIndex, typeName); - - // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { - // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeName); - } - - nextOutputColumn++; - } - - /* - * Now determine the small table results. - */ - int firstSmallTableOutputColumn; - firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); - int smallTableOutputCount = 0; - nextOutputColumn = firstSmallTableOutputColumn; - - // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - if (smallTableIndicesSize > 0) { - smallTableOutputCount = smallTableIndicesSize; - - for (int i = 0; i < smallTableIndicesSize; i++) { - if (smallTableIndices[i] >= 0) { - - // Zero and above numbers indicate a big table key is needed for - // small table result "area". - - int keyIndex = smallTableIndices[i]; - - // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we - // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - String typeName = bigTableKeyTypeNames.get(keyIndex); - - if (!isOuter) { - - // Optimize inner join keys of small table results. - - // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeName); - - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeName); - } - } else { - - // For outer joins, since the small table key can be null when there is no match, - // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. - - int scratchColumn = vOutContext.allocateScratchColumn(typeName); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); - - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeName); + bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); + bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos(); + bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions(); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeName); - } - } else { + bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames(); + bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); + bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions(); - // Negative numbers indicate a column to be (deserialize) read from the small table's - // LazyBinary value row. - int smallTableValueIndex = -smallTableIndices[i] - 1; + bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); - String typeName = smallTableExprs.get(i).getTypeString(); + bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); - // Make a new big table scratch column for the small table value. - int scratchColumn = vOutContext.allocateScratchColumn(typeName); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); + smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - } - nextOutputColumn++; - } - } else if (smallTableRetainSize > 0) { - smallTableOutputCount = smallTableRetainSize; - - // Only small table values appear in join output result. - - for (int i = 0; i < smallTableRetainSize; i++) { - int smallTableValueIndex = smallTableRetainList.get(i); - - // Make a new big table scratch column for the small table value. - String typeName = smallTableExprs.get(i).getTypeString(); - int scratchColumn = vOutContext.allocateScratchColumn(typeName); + projectionMapping = vectorMapJoinInfo.getProjectionMapping(); - projectionMapping.add(nextOutputColumn, scratchColumn, typeName); - - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeName); - nextOutputColumn++; - } - } - - // Convert dynamic arrays and maps to simple arrays. - - bigTableRetainedMapping.finalize(); - - bigTableOuterKeyMapping.finalize(); + determineCommonInfo(isOuterJoin); + } - smallTableMapping.finalize(); + protected void determineCommonInfo(boolean isOuter) throws HiveException { bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); @@ -429,46 +287,37 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); - projectionMapping.finalize(); - - // Verify we added an entry for each output. - assert projectionMapping.isSourceSequenceGood(); - outputProjection = projectionMapping.getOutputColumns(); - outputTypeNames = projectionMapping.getTypeNames(); + outputTypeInfos = projectionMapping.getTypeInfos(); if (isLogDebugEnabled) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeNames " + bigTableKeyTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + bigTableValueTypeNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableIndices " + Arrays.toString(smallTableIndices)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableRetainList " + smallTableRetainList); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor firstSmallTableOutputColumn " + firstSmallTableOutputColumn); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableOutputCount " + smallTableOutputCount); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -482,10 +331,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { ArrayList list = new ArrayList(); int count = mapping.getCount(); int[] outputColumns = mapping.getOutputColumns(); - String[] typeNames = mapping.getTypeNames(); + TypeInfo[] typeInfos = mapping.getTypeInfos(); for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; - String typeName = typeNames[i]; + String typeName = typeInfos[i].getTypeName(); if (VectorizationContext.isStringFamily(typeName)) { list.add(outputColumn); } @@ -500,10 +349,10 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { */ protected void setupVOutContext(List outputColumnNames) { if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { - throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch"); + throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); } vOutContext.resetProjectionColumns(); for (int i = 0; i < outputColumnNames.size(); ++i) { @@ -512,7 +361,7 @@ protected void setupVOutContext(List outputColumnNames) { vOutContext.addProjectionColumn(columnName, outputColumn); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); } } } @@ -522,7 +371,7 @@ protected void setupVOutContext(List outputColumnNames) { */ @Override protected HashTableLoader getHashTableLoader(Configuration hconf) { - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableLoader hashTableLoader; switch (vectorDesc.hashTableImplementationType()) { @@ -546,15 +395,6 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - if (isLogDebugEnabled) { - // Determine the name of our map or reduce task for debug tracing. - BaseWork work = Utilities.getMapWork(hconf); - if (work == null) { - work = Utilities.getReduceWork(hconf); - } - taskName = work.getName(); - } - /* * Get configuration parameters. */ @@ -570,9 +410,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { smallTableVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - VectorizedBatchUtil.typeInfosFromTypeNames( - smallTableMapping.getTypeNames()), - /* useExternalBuffer */ true)); + smallTableMapping.getTypeInfos(), + /* useExternalBuffer */ true)); smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); } @@ -596,13 +435,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (isLogDebugEnabled) { int[] currentScratchColumns = vOutContext.currentScratchColumns(); - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector; List fields = structOutputObjectInspector.getAllStructFieldRefs(); int i = 0; for (StructField field : fields) { - LOG.debug("VectorMapJoinInnerBigOnlyCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); i++; } } @@ -613,7 +452,7 @@ protected void completeInitializationOp(Object[] os) throws HiveException { // setup mapJoinTables and serdes super.completeInitializationOp(os); - VectorMapJoinDesc vectorDesc = conf.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); switch (vectorDesc.hashTableImplementationType()) { case OPTIMIZED: @@ -655,7 +494,7 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { int outputColumn = outputProjection[i]; - String typeName = outputTypeNames[i]; + String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } @@ -687,7 +526,7 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo); if (isLogDebugEnabled) { - LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); } } } @@ -724,9 +563,9 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { } protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { - LOG.debug("commonSetup " + batchName + " column count " + batch.numCols); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { - LOG.debug("commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); + LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName())); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 0bba141..43f3951 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 621804b..95fb0c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on Multi-Key * and only big table columns appear in the join result so a hash multi-set is used. @@ -48,8 +50,17 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -114,7 +125,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index 10e75ab..044e3e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerBigOnlyGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerBigOnlyStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 804d69c..c85e1d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -44,8 +44,17 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index fcfa0bd..a108cd0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Multi-Key * using a hash map. @@ -46,8 +48,17 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerMultiKeyOperator.class.getName()); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 0f9baae..3211d7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinInnerStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 1149a9d..b02e6fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index e0baebc..36b8f3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on Multi-Key * using hash set. @@ -47,8 +49,17 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -113,7 +124,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 49e1177..0b3de0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -46,8 +46,17 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinInnerBigOnlyLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinLeftSemiStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 58bd0ab..72309e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -45,8 +45,17 @@ */ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterLongOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterLongOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 7f9afd2..a4fc7d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -40,6 +40,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on Multi-Key * using a hash map. @@ -47,8 +49,17 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterMultiKeyOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterMultiKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) @@ -112,7 +123,7 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite = new VectorSerializeRow( new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeNames, bigTableKeyColumnMap); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); currentKeyOutput = new Output(); saveKeyOutput = new Output(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 8ed1ed4..6e7e5cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -45,8 +45,17 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerateResultOperator { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOuterStringOperator.class.getName()); + + //------------------------------------------------------------------------------------------------ + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //------------------------------------------------------------------------------------------------ // (none) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 9f3b107..069cc9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -94,7 +94,7 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f34b1cd..111a6d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -40,7 +40,7 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey); boolean isOuterJoin = !desc.isNoOuterJoin(); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableKind hashTableKind = vectorDesc.hashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); boolean minMaxEnabled = vectorDesc.minMaxEnabled(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java index 8133aef..42ca4b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java @@ -160,7 +160,7 @@ public VectorReduceSinkCommonOperator(CompilationOpContext ctx, ReduceSinkDesc desc = (ReduceSinkDesc) conf; this.conf = desc; - vectorDesc = desc.getVectorDesc(); + vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo(); this.vContext = vContext; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 3a179a3..41a85b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -43,6 +43,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -62,7 +64,11 @@ import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; @@ -73,6 +79,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -91,18 +98,34 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; +import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorHashTableSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorLimitDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; +import org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; @@ -117,10 +140,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; @@ -170,6 +196,9 @@ import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -182,11 +211,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.common.util.ReflectionUtil; import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { + static int fake; + protected static transient final Logger LOG = LoggerFactory.getLogger(Vectorizer.class); static Pattern supportedDataTypesPattern; @@ -234,12 +266,35 @@ private boolean isSpark; - boolean useVectorizedInputFileFormat; - boolean useVectorDeserialize; - boolean useRowDeserialize; + private boolean useVectorizedInputFileFormat; + private boolean useVectorDeserialize; + private boolean useRowDeserialize; + private boolean isReduceVectorizationEnabled; boolean isSchemaEvolution; + private BaseWork currentBaseWork; + private Operator currentOperator; + + private void setNodeIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createNodeIssue(issue)); + } + + private void setOperatorIssue(String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createOperatorIssue(currentOperator, issue)); + } + + private void setExpressionIssue(String expressionTitle, String issue) { + currentBaseWork.setNotVectorizedReason( + VectorizerReason.createExpressionIssue(currentOperator, expressionTitle, issue)); + } + + private void clearNotVectorizedReason() { + currentBaseWork.setNotVectorizedReason(null); + } + public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPPlus.class); @@ -369,6 +424,10 @@ public Vectorizer() { int partitionColumnCount; boolean useVectorizedInputFileFormat; + boolean groupByVectorOutput; + boolean allNative; + boolean usesVectorUDFAdaptor; + String[] scratchTypeNameArray; Set> nonVectorizedOps; @@ -379,6 +438,12 @@ public Vectorizer() { partitionColumnCount = 0; } + public void assume() { + groupByVectorOutput = true; + allNative = true; + usesVectorUDFAdaptor = false; + } + public void setAllColumnNames(List allColumnNames) { this.allColumnNames = allColumnNames; } @@ -394,9 +459,19 @@ public void setPartitionColumnCount(int partitionColumnCount) { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; + } public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; } + public void setNonVectorizedOps(Set> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -428,10 +503,29 @@ public void transferToBaseWork(BaseWork baseWork) { scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); - baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + if (baseWork instanceof MapWork) { + MapWork mapWork = (MapWork) baseWork; + mapWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); + } + + baseWork.setAllNative(allNative); + baseWork.setGroupByVectorOutput(groupByVectorOutput); + baseWork.setUsesVectorUDFAdaptor(usesVectorUDFAdaptor); } } + private static void addMapWorkRules(Map opRules, NodeProcessor np) { + opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" + + FileSinkOperator.getOperatorName()), np); + opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*" + + ReduceSinkOperator.getOperatorName()), np); + } + + private static void addReduceWorkRules(Map opRules, NodeProcessor np) { + opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np); + opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np); + } + class VectorizationDispatcher implements Dispatcher { private final PhysicalContext physicalContext; @@ -445,17 +539,29 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; if (currTask instanceof MapRedTask) { - convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false); + MapredWork mapredWork = ((MapRedTask) currTask).getWork(); + convertMapWork(mapredWork.getMapWork(), false); + ReduceWork reduceWork = mapredWork.getReduceWork(); + if (reduceWork != null) { + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We do not vectorize MR Reduce. + } } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork w: work.getAllWork()) { - if (w instanceof MapWork) { - convertMapWork((MapWork) w, true); - } else if (w instanceof ReduceWork) { - // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) w, true); + for (BaseWork baseWork: work.getAllWork()) { + if (baseWork instanceof MapWork) { + convertMapWork((MapWork) baseWork, true); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + // We are only vectorizing Reduce under Tez/Spark. + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); } } } @@ -464,31 +570,48 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) for (BaseWork baseWork : sparkWork.getAllWork()) { if (baseWork instanceof MapWork) { convertMapWork((MapWork) baseWork, false); - } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) baseWork, false); + } else if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + + // Always set the EXPLAIN conditions. + setReduceWorkExplainConditions(reduceWork); + + if (isReduceVectorizationEnabled) { + convertReduceWork(reduceWork); + } } } + } else if (currTask instanceof FetchTask) { + // Set information so EXPLAIN can show conditions not met: Supported IS false. + FetchWork fetchWork = (FetchWork) currTask.getWork(); + fetchWork.setVectorizationExamined(true); } return null; } private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException { + + mapWork.setVectorizationExamined(true); + + // Global used when setting errors, etc. + currentBaseWork = mapWork; + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); + vectorTaskColumnInfo.assume(); + boolean ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTez); if (ret) { vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTez); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } } } - private void addMapWorkRules(Map opRules, NodeProcessor np) { - opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" - + FileSinkOperator.getOperatorName()), np); - opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*" - + ReduceSinkOperator.getOperatorName()), np); - } - /* * Determine if there is only one TableScanOperator. Currently in Map vectorization, we do not * try to vectorize multiple input trees. @@ -499,6 +622,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) LinkedHashMap> aliasToWork = mapWork.getAliasToWork(); if ((aliasToWork == null) || (aliasToWork.size() == 0)) { + setNodeIssue("Vectorized map work requires work"); return null; } int tableScanCount = 0; @@ -507,7 +631,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) for (Entry> entry : aliasToWork.entrySet()) { Operator op = entry.getValue(); if (op == null) { - LOG.warn("Map work has invalid aliases to work with. Fail validation!"); + setNodeIssue("Vectorized map work requires a valid alias"); return null; } if (op instanceof TableScanOperator) { @@ -517,7 +641,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) } } if (tableScanCount > 1) { - LOG.warn("Map work has more than 1 TableScanOperator. Fail validation!"); + setNodeIssue("Vectorized map work only works with 1 TableScanOperator"); return null; } return new ImmutablePair(alias, tableScanOperator); @@ -558,22 +682,6 @@ private void determineDataColumnNums(TableScanOperator tableScanOperator, } } - private String getHiveOptionsString() { - StringBuilder sb = new StringBuilder(); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); - sb.append("="); - sb.append(useVectorizedInputFileFormat); - sb.append(", "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); - sb.append("="); - sb.append(useVectorDeserialize); - sb.append(", and "); - sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); - sb.append("="); - sb.append(useRowDeserialize); - return sb.toString(); - } - /* * There are 3 modes of reading for vectorization: * @@ -588,44 +696,58 @@ private String getHiveOptionsString() { * the row object into the VectorizedRowBatch with VectorAssignRow. * This picks up Input File Format not supported by the other two. */ - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable) { + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable, + HashSet inputFileFormatClassNameSet, HashSet enabledConditionsMetSet, + ArrayList enabledConditionsNotMetList) { String inputFileFormatClassName = pd.getInputFileFormatClassName(); + // Always collect input file formats. + inputFileFormatClassNameSet.add(inputFileFormatClassName); + + boolean isInputFileFormatVectorized = Utilities.isInputFileFormatVectorized(pd); + + if (isAcidTable) { + + // Today, ACID tables are only ORC and that format is vectorizable. Verify these + // assumptions. + Preconditions.checkState(isInputFileFormatVectorized); + Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName())); + + if (!useVectorizedInputFileFormat) { + enabledConditionsNotMetList.add( + "Vectorizing ACID tables requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return false; + } + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat( + inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + return true; + } + // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (isAcidTable || useVectorizedInputFileFormat) { - - if (Utilities.isInputFileFormatVectorized(pd)) { + if (useVectorizedInputFileFormat) { - if (!useVectorizedInputFileFormat) { - LOG.info("ACID tables con only be vectorized for the input file format -- " + - "i.e. when Hive Configuration option " + - HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname + - "=true"); - return false; - } + if (isInputFileFormatVectorized) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorizedInputFileFormat( inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); return true; } - - // Today, ACID tables are only ORC and that format is vectorizable. Verify this - // assumption. - Preconditions.checkState(!isAcidTable); + // Fall through and look for other options... } - if (!(isSchemaEvolution || isAcidTable) && - (useVectorDeserialize || useRowDeserialize)) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when both " + HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION.varname + "=false and " + - " ACID table is " + isAcidTable + " and " + - " given the Hive Configuration options " + getHiveOptionsString()); - return false; + if (!isSchemaEvolution) { + enabledConditionsNotMetList.add( + "Vectorizing tables without Schema Evolution requires " + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); } String deserializerClassName = pd.getDeserializerClassName(); @@ -635,6 +757,12 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the // VectorMapOperator. + boolean isTextFormat = inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName()); + boolean isSequenceFormat = + inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName()); + boolean isVectorDeserializeEligable = isTextFormat || isSequenceFormat; if (useVectorDeserialize) { @@ -648,8 +776,7 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable // org.apache.hadoop.mapred.SequenceFileInputFormat // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && - deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + if (isTextFormat) { Properties properties = pd.getTableDesc().getProperties(); String lastColumnTakesRestString = @@ -659,10 +786,11 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable lastColumnTakesRestString.equalsIgnoreCase("true")); if (lastColumnTakesRest) { - // If row mode will not catch this, then inform. + // If row mode will not catch this input file format, then not enabled. if (useRowDeserialize) { - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " when " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + "is true"); + enabledConditionsNotMetList.add( + inputFileFormatClassName + " " + + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST + " must be disabled "); return false; } } else { @@ -670,17 +798,19 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } - } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && - deserializerClassName.equals(LazyBinarySerDe.class.getName())) { + } else if (isSequenceFormat) { pd.setVectorPartitionDesc( VectorPartitionDesc.createVectorDeserialize( inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); return true; } + // Fall through and look for other options... } // Otherwise, if enabled, deserialize rows using regular Serde and add the object @@ -694,17 +824,29 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable Utilities.isInputFileFormatSelfDescribing(pd), deserializerClassName)); + enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); return true; } - LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + - " given the Hive Configuration options " + getHiveOptionsString()); - + if (isInputFileFormatVectorized) { + Preconditions.checkState(!useVectorizedInputFileFormat); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + } else { + // Only offer these when the input file format is not the fast vectorized formats. + if (isVectorDeserializeEligable) { + Preconditions.checkState(!useVectorDeserialize); + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); + } else { + // Since row mode takes everyone. + enabledConditionsNotMetList.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); + } + } + return false; } - private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, + private ImmutablePair validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { @@ -732,27 +874,39 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al LinkedHashMap> pathToAliases = mapWork.getPathToAliases(); LinkedHashMap pathToPartitionInfo = mapWork.getPathToPartitionInfo(); + + // Remember the input file formats we validated and why. + HashSet inputFileFormatClassNameSet = new HashSet(); + HashSet enabledConditionsMetSet = new HashSet(); + ArrayList enabledConditionsNotMetList = new ArrayList(); + for (Entry> entry: pathToAliases.entrySet()) { Path path = entry.getKey(); List aliases = entry.getValue(); boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1); if (!isPresent) { - LOG.info("Alias " + alias + " not present in aliases " + aliases); - return false; + setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); + return new ImmutablePair(false, false); } PartitionDesc partDesc = pathToPartitionInfo.get(path); if (partDesc.getVectorPartitionDesc() != null) { // We seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable)) { - return false; + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable, inputFileFormatClassNameSet, + enabledConditionsMetSet, enabledConditionsNotMetList)) { + + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + // We consider this an enable issue, not a not vectorized issue. + LOG.info("Cannot enable vectorization because input file format(s) " + inputFileFormatClassNameSet + + " do not met conditions " + VectorizationCondition.addBooleans(enabledConditionsNotMetList, false)); + return new ImmutablePair(false, true); } VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - if (LOG.isInfoEnabled()) { - LOG.info("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + - ", aliases " + aliases); - } if (isFirst) { @@ -796,13 +950,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al * implicitly defaulted to null. */ if (nextDataColumnList.size() > tableDataColumnList.size()) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column names %d is greater than the number of table columns %d", path, nextDataColumnList.size(), tableDataColumnList.size())); - return false; + return new ImmutablePair(false, false); } if (!(deserializer instanceof NullStructSerDe)) { @@ -811,13 +965,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al String nextColumnName = nextDataColumnList.get(i); String tableColumnName = tableDataColumnList.get(i); if (!nextColumnName.equals(tableColumnName)) { - LOG.info( + setOperatorIssue( String.format( "Could not vectorize partition %s " + "(deserializer " + deserializer.getClass().getName() + ")" + "The partition column name %s is does not match table column name %s", path, nextColumnName, tableColumnName)); - return false; + return new ImmutablePair(false, false); } } } @@ -852,7 +1006,12 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al // Helps to keep this for debugging. vectorTaskColumnInfo.setTableScanOperator(tableScanOperator); - return true; + // Always set these so EXPLAIN can see. + mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); + + return new ImmutablePair(true, false); } private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) @@ -860,19 +1019,35 @@ private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask LOG.info("Validating MapWork..."); - ImmutablePair pair = verifyOnlyOneTableScanOperator(mapWork); - if (pair == null) { + ImmutablePair onlyOneTableScanPair = verifyOnlyOneTableScanOperator(mapWork); + if (onlyOneTableScanPair == null) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); return false; } - String alias = pair.left; - TableScanOperator tableScanOperator = pair.right; + String alias = onlyOneTableScanPair.left; + TableScanOperator tableScanOperator = onlyOneTableScanPair.right; // This call fills in the column names, types, and partition column count in // vectorTaskColumnInfo. - if (!validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo)) { + currentOperator = tableScanOperator; + ImmutablePair validateInputFormatAndSchemaEvolutionPair = + validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo); + if (!validateInputFormatAndSchemaEvolutionPair.left) { + // Have we already set the enabled conditions not met? + if (!validateInputFormatAndSchemaEvolutionPair.right) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + Preconditions.checkState(notVectorizedReason != null); + mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); + } return false; } + // Now we are enabled and any issues found from here on out are considered + // not vectorized issues. + mapWork.setVectorizationEnabled(true); + Map opRules = new LinkedHashMap(); MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); addMapWorkRules(opRules, vnp); @@ -923,11 +1098,34 @@ private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskCo return; } - private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { + private void setReduceWorkExplainConditions(ReduceWork reduceWork) { + + reduceWork.setVectorizationExamined(true); + + reduceWork.setReduceVectorizationEnabled(isReduceVectorizationEnabled); + reduceWork.setVectorReduceEngine( + HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); + } + + private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { + + // Global used when setting errors, etc. + currentBaseWork = reduceWork; + currentBaseWork.setVectorizationEnabled(true); + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorTaskColumnInfo.assume(); + + boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo); if (ret) { - vectorizeReduceWork(reduceWork, vectorTaskColumnInfo, isTez); + vectorizeReduceWork(reduceWork, vectorTaskColumnInfo); + } else if (currentBaseWork.getVectorizationEnabled()) { + VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + LOG.info("Cannot vectorize: unknown"); + } else { + LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); + } } } @@ -941,13 +1139,14 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, // Check key ObjectInspector. ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector(); if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Key object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector; List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); - // Tez doesn't use tagging... if (reduceWork.getNeedsTagging()) { + setNodeIssue("Tez doesn't use tagging"); return false; } @@ -955,6 +1154,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector(); if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) { + setNodeIssue("Value object inspector missing or not StructObjectInspector"); return false; } StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; @@ -978,13 +1178,8 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, return true; } - private void addReduceWorkRules(Map opRules, NodeProcessor np) { - opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np); - opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np); - } - private boolean validateReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Validating ReduceWork..."); @@ -1015,7 +1210,7 @@ private boolean validateReduceWork(ReduceWork reduceWork, } private void vectorizeReduceWork(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { LOG.info("Vectorizing ReduceWork..."); reduceWork.setVectorMode(true); @@ -1025,7 +1220,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork, // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo, isTez); + new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -1077,6 +1272,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return new Boolean(true); } boolean ret; + currentOperator = op; try { ret = validateMapWorkOperator(op, mapWork, isTez); } catch (Exception e) { @@ -1119,6 +1315,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizedOps.contains(op)) { return new Boolean(true); } + currentOperator = op; boolean ret = validateReduceWorkOperator(op); if (!ret) { LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized."); @@ -1142,9 +1339,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // The vectorization context for the Map or Reduce task. protected VectorizationContext taskVectorizationContext; + protected final VectorTaskColumnInfo vectorTaskColumnInfo; protected final Set> nonVectorizedOps; - VectorizationNodeProcessor(Set> nonVectorizedOps) { + VectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, + Set> nonVectorizedOps) { + this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.nonVectorizedOps = nonVectorizedOps; } @@ -1192,11 +1392,11 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac } public Operator doVectorize(Operator op, - VectorizationContext vContext, boolean isTez) throws SemanticException { + VectorizationContext vContext, boolean isEnhanced) throws SemanticException { Operator vectorOp = op; try { if (!opsDone.contains(op)) { - vectorOp = vectorizeOperator(op, vContext, isTez); + vectorOp = vectorizeOperator(op, vContext, isEnhanced, vectorTaskColumnInfo); opsDone.add(op); if (vectorOp != op) { opToVectorOpMap.put(op, vectorOp); @@ -1224,7 +1424,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.mWork = mWork; this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.isTez = isTez; @@ -1241,6 +1441,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, VectorizationContext vContext = null; + currentOperator = op; if (op instanceof TableScanOperator) { if (taskVectorizationContext == null) { taskVectorizationContext = getVectorizationContext(op.getName(), vectorTaskColumnInfo); @@ -1279,7 +1480,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final VectorTaskColumnInfo vectorTaskColumnInfo; - private final boolean isTez; private Operator rootVectorOp; @@ -1287,13 +1487,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, - boolean isTez) { + public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo) { - super(vectorTaskColumnInfo.getNonVectorizedOps()); + super(vectorTaskColumnInfo, vectorTaskColumnInfo.getNonVectorizedOps()); this.vectorTaskColumnInfo = vectorTaskColumnInfo; rootVectorOp = null; - this.isTez = isTez; } @Override @@ -1309,6 +1507,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean saveRootVectorOp = false; + currentOperator = op; if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.allColumnNames.toString()); @@ -1333,7 +1532,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, assert vContext != null; LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - Operator vectorOp = doVectorize(op, vContext, isTez); + Operator vectorOp = doVectorize(op, vContext, true); if (LOG.isDebugEnabled()) { if (vectorOp instanceof VectorizationContextRegion) { @@ -1390,6 +1589,10 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE); + isReduceVectorizationEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1407,7 +1610,7 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } - boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { + boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isEnhanced) { boolean ret = false; switch (op.getType()) { case MAPJOIN: @@ -1418,7 +1621,7 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo } break; case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, false, isTez); + ret = validateGroupByOperator((GroupByOperator) op, false, isEnhanced); break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1512,7 +1715,7 @@ private Boolean isVectorizedGroupByThatOutputsRows(Operator filterExprs = desc.getFilters().get(posBigTable); - if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) { - LOG.info("Cannot vectorize map work filter expression"); + if (!validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER)) { return false; } List keyExprs = desc.getKeys().get(posBigTable); - if (!validateExprNodeDesc(keyExprs)) { - LOG.info("Cannot vectorize map work key expression"); + if (!validateExprNodeDesc(keyExprs, "Key")) { return false; } List valueExprs = desc.getExprs().get(posBigTable); - if (!validateExprNodeDesc(valueExprs)) { - LOG.info("Cannot vectorize map work value expression"); + if (!validateExprNodeDesc(valueExprs, "Value")) { return false; } Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (!validateExprNodeDesc(smallTableExprs)) { - LOG.info("Cannot vectorize map work small table expression"); + if (!validateExprNodeDesc(smallTableExprs, "Small Table")) { return false; } return true; @@ -1571,24 +1771,23 @@ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op List filterExprs = desc.getFilters().get(tag); List keyExprs = desc.getKeys().get(tag); List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) && - validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs); + return validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER) && + validateExprNodeDesc(keyExprs, "Key") && validateExprNodeDesc(valueExprs, "Value"); } private boolean validateReduceSinkOperator(ReduceSinkOperator op) { List keyDescs = op.getConf().getKeyCols(); List partitionDescs = op.getConf().getPartitionCols(); List valueDesc = op.getConf().getValueCols(); - return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) && - validateExprNodeDesc(valueDesc); + return validateExprNodeDesc(keyDescs, "Key") && validateExprNodeDesc(partitionDescs, "Partition") && + validateExprNodeDesc(valueDesc, "Value"); } private boolean validateSelectOperator(SelectOperator op) { List descList = op.getConf().getColList(); for (ExprNodeDesc desc : descList) { - boolean ret = validateExprNodeDesc(desc); + boolean ret = validateExprNodeDesc(desc, "Select"); if (!ret) { - LOG.info("Cannot vectorize select expression: " + desc.toString()); return false; } } @@ -1597,28 +1796,26 @@ private boolean validateSelectOperator(SelectOperator op) { private boolean validateFilterOperator(FilterOperator op) { ExprNodeDesc desc = op.getConf().getPredicate(); - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER); + return validateExprNodeDesc(desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER); } - private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) { + private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isEnhanced) { GroupByDesc desc = op.getConf(); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); + setOperatorIssue("Grouping sets not supported"); return false; } if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); + setOperatorIssue("Pruning grouping set id not supported"); return false; } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { - LOG.info("DISTINCT not supported in vector mode"); + setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDesc(desc.getKeys()); + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { - LOG.info("Cannot vectorize groupby key expression " + desc.getKeys().toString()); return false; } @@ -1731,6 +1928,9 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo // If all the aggregation outputs are primitive, we can output VectorizedRowBatch. // Otherwise, we the rest of the operator tree will be row mode. + VectorGroupByDesc vectorDesc = new VectorGroupByDesc(); + desc.setVectorDesc(vectorDesc); + vectorDesc.setVectorOutput(retPair.right); vectorDesc.setProcessingMode(processingMode); @@ -1745,14 +1945,15 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { return true; } - private boolean validateExprNodeDesc(List descs) { - return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(List descs, String expressionTitle) { + return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } private boolean validateExprNodeDesc(List descs, + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : descs) { - boolean ret = validateExprNodeDesc(d, mode); + boolean ret = validateExprNodeDesc(d, expressionTitle, mode); if (!ret) { return false; } @@ -1775,19 +1976,20 @@ private boolean validateExprNodeDesc(List descs, return new Pair(true, outputIsPrimitive); } - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - LOG.info("Cannot vectorize virtual column " + c.getColumn()); + setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); return false; } } String typeName = desc.getTypeInfo().getTypeName(); boolean ret = validateDataType(typeName, mode); if (!ret) { - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); + setExpressionIssue(expressionTitle, "Data type " + typeName + " of " + desc.toString() + " not supported"); return false; } boolean isInExpression = false; @@ -1795,7 +1997,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; boolean r = validateGenericUdf(d); if (!r) { - LOG.info("Cannot vectorize UDF " + d); + setExpressionIssue(expressionTitle, "UDF " + d + " not supported"); return false; } GenericUDF genericUDF = d.getGenericUDF(); @@ -1806,14 +2008,14 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { for (ExprNodeDesc d : desc.getChildren()) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } @@ -1823,7 +2025,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio } private boolean validateStructInExpression(ExprNodeDesc desc, - VectorExpressionDescriptor.Mode mode) { + String expressionTitle, VectorExpressionDescriptor.Mode mode) { for (ExprNodeDesc d : desc.getChildren()) { TypeInfo typeInfo = d.getTypeInfo(); if (typeInfo.getCategory() != Category.STRUCT) { @@ -1839,7 +2041,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); Category category = fieldTypeInfo.getCategory(); if (category != Category.PRIMITIVE) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1852,7 +2055,8 @@ private boolean validateStructInExpression(ExprNodeDesc desc, if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) { - LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + setExpressionIssue(expressionTitle, + "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName()); return false; } @@ -1861,31 +2065,31 @@ private boolean validateStructInExpression(ExprNodeDesc desc, return true; } - private boolean validateExprNodeDesc(ExprNodeDesc desc) { - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION); + private boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle) { + return validateExprNodeDesc(desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } - boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc, mode)) { + boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode) { + if (!validateExprNodeDescRecursive(desc, expressionTitle, mode)) { return false; } try { VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + setExpressionIssue(expressionTitle, "getVectorExpression returned null"); return false; } } catch (Exception e) { if (e instanceof HiveException) { - LOG.info(e.getMessage()); + setExpressionIssue(expressionTitle, e.getMessage()); } else { if (LOG.isDebugEnabled()) { // Show stack trace. LOG.debug("Failed to vectorize", e); - } else { - LOG.info("Failed to vectorize", e.getMessage()); } + setExpressionIssue(expressionTitle, e.getMessage()); } return false; } @@ -1905,9 +2109,9 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } } - private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) { + public static ObjectInspector.Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE); + return outputObjInspector.getCategory(); } private Pair validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, @@ -1915,11 +2119,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); + setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return new Pair(false, false); } - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) { return new Pair(false, false); } @@ -1933,6 +2136,7 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA if (LOG.isDebugEnabled()) { LOG.debug("Vectorization of aggreation should have succeeded ", e); } + setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e); return new Pair(false, false); } if (LOG.isDebugEnabled()) { @@ -1940,11 +2144,12 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA " vector expression " + vectorAggrExpr.toString()); } - boolean outputIsPrimitive = validateAggregationIsPrimitive(vectorAggrExpr); + ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr); + boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE); if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) { - LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); + setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); return new Pair(false, false); } @@ -2012,12 +2217,12 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { if (smallTableIndices[i] < 0) { // Negative numbers indicate a column to be (deserialize) read from the small table's // LazyBinary value row. - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); return false; } } } else if (smallTableRetainSize > 0) { - LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); + setOperatorIssue("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); return false; } @@ -2026,20 +2231,21 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { } Operator specializeMapJoinOperator(Operator op, - VectorizationContext vContext, MapJoinDesc desc) throws HiveException { + VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { Operator vectorOp = null; Class> opClass = null; - VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; - VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE; - VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); + + HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; + HashTableKind hashTableKind = HashTableKind.NONE; + HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; + OperatorVariation operatorVariation = OperatorVariation.NONE; - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; } else { - // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or - // HybridHashTableContainer. hashTableImplementationType = HashTableImplementationType.OPTIMIZED; } @@ -2061,20 +2267,31 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { Map> keyExprs = desc.getKeys(); List bigTableKeyExprs = keyExprs.get(posBigTable); if (bigTableKeyExprs.size() == 1) { - String typeName = bigTableKeyExprs.get(0).getTypeString(); - LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName); - if (typeName.equals("boolean")) { + TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo(); + LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName()); + switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { + case BOOLEAN: hashTableKeyType = HashTableKeyType.BOOLEAN; - } else if (typeName.equals("tinyint")) { + break; + case BYTE: hashTableKeyType = HashTableKeyType.BYTE; - } else if (typeName.equals("smallint")) { + break; + case SHORT: hashTableKeyType = HashTableKeyType.SHORT; - } else if (typeName.equals("int")) { + break; + case INT: hashTableKeyType = HashTableKeyType.INT; - } else if (typeName.equals("bigint") || typeName.equals("long")) { + break; + case LONG: hashTableKeyType = HashTableKeyType.LONG; - } else if (VectorizationContext.isStringFamily(typeName)) { + break; + case STRING: + case CHAR: + case VARCHAR: + case BINARY: hashTableKeyType = HashTableKeyType.STRING; + default: + // Stay with multi-key. } } } @@ -2082,16 +2299,20 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { switch (joinType) { case JoinDesc.INNER_JOIN: if (!isInnerBigOnly) { + operatorVariation = OperatorVariation.INNER; hashTableKind = HashTableKind.HASH_MAP; } else { + operatorVariation = OperatorVariation.INNER_BIG_ONLY; hashTableKind = HashTableKind.HASH_MULTISET; } break; case JoinDesc.LEFT_OUTER_JOIN: case JoinDesc.RIGHT_OUTER_JOIN: + operatorVariation = OperatorVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; case JoinDesc.LEFT_SEMI_JOIN: + operatorVariation = OperatorVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; break; default: @@ -2106,86 +2327,84 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case SHORT: case INT: case LONG: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerLongOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyLongOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerLongOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterLongOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyLongOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiLongOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterLongOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case STRING: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerStringOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyStringOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerStringOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterStringOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyStringOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiStringOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterStringOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; case MULTI_KEY: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerMultiKeyOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; - } + switch (operatorVariation) { + case INNER: + opClass = VectorMapJoinInnerMultiKeyOperator.class; break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterMultiKeyOperator.class; + case INNER_BIG_ONLY: + opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; break; - case JoinDesc.LEFT_SEMI_JOIN: + case LEFT_SEMI: opClass = VectorMapJoinLeftSemiMultiKeyOperator.class; break; + case OUTER: + opClass = VectorMapJoinOuterMultiKeyOperator.class; + break; default: - throw new HiveException("Unknown join type " + joinType); + throw new HiveException("Unknown operator variation " + operatorVariation); } break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); } - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); - boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED); - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); vectorDesc.setHashTableImplementationType(hashTableImplementationType); vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); + vectorDesc.setOperatorVariation(operatorVariation); vectorDesc.setMinMaxEnabled(minMaxEnabled); + vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); + return vectorOp; } - private boolean onExpressionHasNullSafes(MapJoinDesc desc) { + public static boolean onExpressionHasNullSafes(MapJoinDesc desc) { boolean[] nullSafes = desc.getNullSafes(); if (nullSafes == null) { - return false; + return false; } for (boolean nullSafe : nullSafes) { if (nullSafe) { @@ -2196,53 +2415,372 @@ private boolean onExpressionHasNullSafes(MapJoinDesc desc) { } private boolean canSpecializeMapJoin(Operator op, MapJoinDesc desc, - boolean isTez) { + boolean isEnhanced, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) + throws HiveException { + + Preconditions.checkState(op instanceof MapJoinOperator); + + // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN + // can report this operator was vectorized, but not native. And, the conditions. + VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc(); + desc.setVectorDesc(vectorDesc); + + boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED); + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + boolean oneMapJoinCondition = (desc.getConds().length == 1); + + boolean hasNullSafes = onExpressionHasNullSafes(desc); + + byte posBigTable = (byte) desc.getPosBigTable(); - boolean specialize = false; + // Since we want to display all the met and not met conditions in EXPLAIN, we determine all + // information first.... - if (op instanceof MapJoinOperator && + List keyDesc = desc.getKeys().get(posBigTable); + VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); + final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; + boolean isEmptyKey = (allBigTableKeyExpressionsLength == 0); + + boolean supportsKeyTypes = true; // Assume. + HashSet notSupportedKeyTypes = new HashSet(); + + // Since a key expression can be a calculation and the key will go into a scratch column, + // we need the mapping and type information. + int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength]; + String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; + TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; + ArrayList bigTableKeyExpressionsList = new ArrayList(); + VectorExpression[] bigTableKeyExpressions; + for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { + VectorExpression ve = allBigTableKeyExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableKeyExpressionsList.add(ve); + } + bigTableKeyColumnMap[i] = ve.getOutputColumn(); + + ExprNodeDesc exprNode = keyDesc.get(i); + bigTableKeyColumnNames[i] = exprNode.toString(); + + TypeInfo typeInfo = exprNode.getTypeInfo(); + // Verify we handle the key column types for an optimized table. This is the effectively the + // same check used in HashTableLoader. + if (!MapJoinKey.isSupportedField(typeInfo)) { + supportsKeyTypes = false; + Category category = typeInfo.getCategory(); + notSupportedKeyTypes.add( + (category != Category.PRIMITIVE ? category.toString() : + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString())); + } + bigTableKeyTypeInfos[i] = typeInfo; + } + if (bigTableKeyExpressionsList.size() == 0) { + bigTableKeyExpressions = null; + } else { + bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); + } + + List bigTableExprs = desc.getExprs().get(posBigTable); + VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); + + boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) { + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED); + vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled); - // Currently, only under Tez and non-N-way joins. - if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) { + // Especially since LLAP is prone to turn it off in the MapJoinDesc in later + // physical optimizer stages... + boolean isHybridHashJoin = desc.isHybridHashJoin(); + vectorDesc.setIsHybridHashJoin(isHybridHashJoin); - // Ok, all basic restrictions satisfied so far... - specialize = true; + /* + * Populate vectorMapJoininfo. + */ - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { + /* + * Similarly, we need a mapping since a value expression can be a calculation and the value + * will go into a scratch column. + */ + int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; + String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; + TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; + ArrayList bigTableValueExpressionsList = new ArrayList(); + VectorExpression[] bigTableValueExpressions; + for (int i = 0; i < bigTableValueColumnMap.length; i++) { + VectorExpression ve = allBigTableValueExpressions[i]; + if (!IdentityExpression.isColumnOnly(ve)) { + bigTableValueExpressionsList.add(ve); + } + bigTableValueColumnMap[i] = ve.getOutputColumn(); - // We are using the optimized hash table we have further - // restrictions (using optimized and key type). + ExprNodeDesc exprNode = bigTableExprs.get(i); + bigTableValueColumnNames[i] = exprNode.toString(); + bigTableValueTypeInfos[i] = exprNode.getTypeInfo(); + } + if (bigTableValueExpressionsList.size() == 0) { + bigTableValueExpressions = null; + } else { + bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); + } - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) { - specialize = false; - } else { - byte posBigTable = (byte) desc.getPosBigTable(); - Map> keyExprs = desc.getKeys(); - List bigTableKeyExprs = keyExprs.get(posBigTable); - for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) { - String typeName = exprNodeDesc.getTypeString(); - if (!MapJoinKey.isSupportedField(typeName)) { - specialize = false; - break; - } + vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap); + vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames); + vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos); + vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions); + + vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap); + vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos); + vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions); + + /* + * Small table information. + */ + VectorColumnOutputMapping bigTableRetainedMapping = + new VectorColumnOutputMapping("Big Table Retained Mapping"); + + VectorColumnOutputMapping bigTableOuterKeyMapping = + new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + + // The order of the fields in the LazyBinary small table value must be used, so + // we use the source ordering flavor for the mapping. + VectorColumnSourceMapping smallTableMapping = + new VectorColumnSourceMapping("Small Table Mapping"); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + boolean isOuterJoin = !desc.getNoOuterJoin(); + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + int smallTableRetainSize = smallTableRetainList.size(); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + // We use a mapping object here so we can build the projection in any order and + // get the ordered by 0 to n-1 output columns at the end. + // + // Also, to avoid copying a big table key into the small table result area for inner joins, + // we reference it with the projection so there can be duplicate output columns + // in the projection. + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + for (int i = 0; i < bigTableRetainSize; i++) { + + // Since bigTableValueExpressions may do a calculation and produce a scratch column, we + // need to map to the right batch column. + + int retainColumn = bigTableRetainList.get(i); + int batchColumnIndex = bigTableValueColumnMap[retainColumn]; + TypeInfo typeInfo = bigTableValueTypeInfos[i]; + + // With this map we project the big table batch to make it look like an output batch. + projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); + + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. + bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + + nextOutputColumn++; + } + + /* + * Now determine the small table results. + */ + boolean smallTableExprVectorizes = true; + + int firstSmallTableOutputColumn; + firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); + int smallTableOutputCount = 0; + nextOutputColumn = firstSmallTableOutputColumn; + + // Small table indices has more information (i.e. keys) than retain, so use it if it exists... + String[] bigTableRetainedNames; + if (smallTableIndicesSize > 0) { + smallTableOutputCount = smallTableIndicesSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + for (int i = 0; i < smallTableIndicesSize; i++) { + if (smallTableIndices[i] >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + int keyIndex = smallTableIndices[i]; + + // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we + // need to map the right column. + int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; + bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; + + if (!isOuterJoin) { + + // Optimize inner join keys of small table results. + + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + + if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { + // If necessary, copy the big table key into the overflow batch's small table + // result "area". + bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); } + } else { + + // For outer joins, since the small table key can be null when there is no match, + // we must have a physical (scratch) column for those keys. We cannot use the + // projection optimization used by inner joins above. + + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + + bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); } } else { - // With the fast hash table implementation, we currently do not support - // Hybrid Grace Hash Join. + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + int smallTableValueIndex = -smallTableIndices[i] - 1; - if (desc.isHybridHashJoin()) { - specialize = false; + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + + // Make a new big table scratch column for the small table value. + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } + nextOutputColumn++; + } + } else if (smallTableRetainSize > 0) { + smallTableOutputCount = smallTableRetainSize; + bigTableRetainedNames = new String[smallTableOutputCount]; + + // Only small table values appear in join output result. + + for (int i = 0; i < smallTableRetainSize; i++) { + int smallTableValueIndex = smallTableRetainList.get(i); + + ExprNodeDesc smallTableExprNode = smallTableExprs.get(i); + if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) { + clearNotVectorizedReason(); + smallTableExprVectorizes = false; + } + + bigTableRetainedNames[i] = smallTableExprNode.toString(); + + // Make a new big table scratch column for the small table value. + TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); + int scratchColumn = vContext.allocateScratchColumn(typeInfo.getTypeName()); + + projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); + + smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + nextOutputColumn++; + } + } else { + bigTableRetainedNames = new String[0]; + } + + // Remember the condition variables for EXPLAIN regardless. + vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setOneMapJoinCondition(oneMapJoinCondition); + vectorDesc.setHasNullSafes(hasNullSafes); + vectorDesc.setSupportsKeyTypes(supportsKeyTypes); + if (!supportsKeyTypes) { + vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes)); + } + vectorDesc.setIsEmptyKey(isEmptyKey); + vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes); + + // Currently, only under Tez and non-N-way joins. + if (!isVectorizationMapJoinNativeEnabled || + !isEnhanced || + !oneMapJoinCondition || + hasNullSafes || + !supportsKeyTypes || + isEmptyKey || + !smallTableExprVectorizes) { + return false; + } + + if (!isFastHashTableEnabled) { + + // No restrictions for optimized hash table. + + } else { + + // With the fast hash table implementation, we currently do not support + // Hybrid Grace Hash Join. + + if (isHybridHashJoin) { + return false; } } - return specialize; + + // Convert dynamic arrays and maps to simple arrays. + + bigTableRetainedMapping.finalize(); + + bigTableOuterKeyMapping.finalize(); + + smallTableMapping.finalize(); + + vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); + vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); + vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + + projectionMapping.finalize(); + + // Verify we added an entry for each output. + assert projectionMapping.isSourceSequenceGood(); + + vectorMapJoinInfo.setProjectionMapping(projectionMapping); + + return true; } private Operator specializeReduceSinkOperator( @@ -2302,8 +2840,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType); } - VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); - desc.setVectorDesc(vectorDesc); + VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc(); + vectorDesc.setReduceSinkKeyType(reduceSinkKeyType); vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo); @@ -2315,51 +2853,60 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } private boolean canSpecializeReduceSink(ReduceSinkDesc desc, - boolean isTez, VectorizationContext vContext, + boolean isEnhanced, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException { - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED)) { - return false; - } + // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this + // operator was vectorized, but not native. And, the conditions. + VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc(); + desc.setVectorDesc(vectorDesc); - // Many restrictions. + boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED); - if (!isTez) { - return false; - } + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); - if (desc.getWriteType() == AcidUtils.Operation.UPDATE || - desc.getWriteType() == AcidUtils.Operation.DELETE) { - return false; - } + boolean acidChange = + desc.getWriteType() == AcidUtils.Operation.UPDATE || + desc.getWriteType() == AcidUtils.Operation.DELETE; - if (desc.getBucketCols() != null && !desc.getBucketCols().isEmpty()) { - return false; - } + boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty(); - boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (!useUniformHash) { - return false; - } + boolean hasTopN = desc.getTopN() >= 0; - if (desc.getTopN() >= 0) { - return false; - } + boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM); - if (desc.getDistinctColumnIndices().size() > 0) { - return false; - } + boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0; TableDesc keyTableDesc = desc.getKeySerializeInfo(); Class keySerializerClass = keyTableDesc.getDeserializerClass(); - if (keySerializerClass != org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class) { - return false; - } + boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class); TableDesc valueTableDesc = desc.getValueSerializeInfo(); Class valueDeserializerClass = valueTableDesc.getDeserializerClass(); - if (valueDeserializerClass != org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class) { + boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class); + + // Remember the condition variables for EXPLAIN regardless. + vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled); + vectorDesc.setEngine(engine); + vectorDesc.setAcidChange(acidChange); + vectorDesc.setHasBuckets(hasBuckets); + vectorDesc.setHasTopN(hasTopN); + vectorDesc.setUseUniformHash(useUniformHash); + vectorDesc.setHasDistinctColumns(hasDistinctColumns); + vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable); + vectorDesc.setIsValueLazyBinary(isValueLazyBinary); + + // Many restrictions. + if (!isVectorizationReduceSinkNativeEnabled || + !isEnhanced || + acidChange || + hasBuckets || + hasTopN || + !useUniformHash || + hasDistinctColumns || + !isKeyBinarySortable || + !isValueLazyBinary) { return false; } @@ -2429,21 +2976,103 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, return true; } - Operator vectorizeOperator(Operator op, - VectorizationContext vContext, boolean isTez) throws HiveException { + private boolean usesVectorUDFAdaptor(VectorExpression vecExpr) { + if (vecExpr == null) { + return false; + } + if (vecExpr instanceof VectorUDFAdaptor) { + return true; + } + if (usesVectorUDFAdaptor(vecExpr.getChildExpressions())) { + return true; + } + return false; + } + + private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { + if (vecExprs == null) { + return false; + } + for (VectorExpression vecExpr : vecExprs) { + if (usesVectorUDFAdaptor(vecExpr)) { + return true; + } + } + return false; + } + + public static Operator vectorizeFilterOperator( + Operator filterOp, VectorizationContext vContext) + throws HiveException { + FilterDesc filterDesc = (FilterDesc) filterOp.getConf(); + VectorFilterDesc vectorFilterDesc = new VectorFilterDesc(); + filterDesc.setVectorDesc(vectorFilterDesc); + ExprNodeDesc predicateExpr = filterDesc.getPredicate(); + VectorExpression vectorPredicateExpr = + vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER); + vectorFilterDesc.setPredicateExpression(vectorPredicateExpr); + return OperatorFactory.getVectorOperator( + filterOp.getCompilationOpContext(), filterDesc, vContext); + } + + /* + * NOTE: The VectorGroupByDesc has already been allocated and partially populated. + */ + public static Operator vectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext) + throws HiveException { + GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf(); + List keysDesc = groupByDesc.getKeys(); + VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc); + ArrayList aggrDesc = groupByDesc.getAggregators(); + VectorAggregateExpression[] vecAggregators = new VectorAggregateExpression[aggrDesc.size()]; + for (int i = 0; i < aggrDesc.size(); ++i) { + AggregationDesc aggDesc = aggrDesc.get(i); + vecAggregators[i] = vContext.getAggregatorExpression(aggDesc); + } + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) groupByDesc.getVectorDesc(); + vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); + vectorGroupByDesc.setAggregators(vecAggregators); + return OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), groupByDesc, vContext); + } + + public static Operator vectorizeSelectOperator( + Operator selectOp, VectorizationContext vContext) + throws HiveException { + SelectDesc selectDesc = (SelectDesc) selectOp.getConf(); + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selectDesc.setVectorDesc(vectorSelectDesc); + List colList = selectDesc.getColList(); + VectorExpression[] vectorSelectExprs = new VectorExpression[colList.size()]; + for (int i = 0; i < colList.size(); i++) { + ExprNodeDesc expr = colList.get(i); + VectorExpression ve = vContext.getVectorExpression(expr); + vectorSelectExprs[i] = ve; + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + return OperatorFactory.getVectorOperator( + selectOp.getCompilationOpContext(), selectDesc, vContext); + } + + public Operator vectorizeOperator(Operator op, + VectorizationContext vContext, boolean isEnhanced, VectorTaskColumnInfo vectorTaskColumnInfo) + throws HiveException { Operator vectorOp = null; + boolean isNative; switch (op.getType()) { case MAPJOIN: { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - boolean specialize = canSpecializeMapJoin(op, desc, isTez || isSpark); - - if (!specialize) { - - Class> opClass = null; - if (op instanceof MapJoinOperator) { - + if (op instanceof MapJoinOperator) { + VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo(); + MapJoinDesc desc = (MapJoinDesc) op.getConf(); + boolean specialize = canSpecializeMapJoin(op, desc, isEnhanced, vContext, vectorMapJoinInfo); + + if (!specialize) { + + Class> opClass = null; + // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered... List bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable()); @@ -2453,20 +3082,36 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } else { opClass = VectorMapJoinOuterFilteredOperator.class; } - } else if (op instanceof SMBMapJoinOperator) { - opClass = VectorSMBMapJoinOperator.class; + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), op.getConf(), vContext); + isNative = false; + } else { + + // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... + // HiveConf.setBoolVar(physicalContext.getConf(), + // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); + + vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo); + isNative = true; + + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } - - vectorOp = OperatorFactory.getVectorOperator( - opClass, op.getCompilationOpContext(), op.getConf(), vContext); - } else { - - // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... - // HiveConf.setBoolVar(physicalContext.getConf(), - // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); - - vectorOp = specializeMapJoinOperator(op, vContext, desc); + Preconditions.checkState(op instanceof SMBMapJoinOperator); + SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf(); + VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc(); + smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), smbJoinSinkDesc, vContext); + isNative = false; } } break; @@ -2475,38 +3120,133 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, { VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo(); ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - boolean specialize = canSpecializeReduceSink(desc, isTez, vContext, vectorReduceSinkInfo); + boolean specialize = canSpecializeReduceSink(desc, isEnhanced, vContext, vectorReduceSinkInfo); if (!specialize) { vectorOp = OperatorFactory.getVectorOperator( op.getCompilationOpContext(), op.getConf(), vContext); - + isNative = false; } else { vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo); + isNative = true; + if (vectorTaskColumnInfo != null) { + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } } } break; - case GROUPBY: case FILTER: + { + vectorOp = vectorizeFilterOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorFilterDesc vectorFilterDesc = + (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression(); + if (usesVectorUDFAdaptor(vectorPredicateExpr)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; case SELECT: + { + vectorOp = vectorizeSelectOperator(op, vContext); + isNative = true; + if (vectorTaskColumnInfo != null) { + VectorSelectDesc vectorSelectDesc = + (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions(); + if (usesVectorUDFAdaptor(vectorSelectExprs)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + break; + case GROUPBY: + { + vectorOp = vectorizeGroupByOperator(op, vContext); + isNative = false; + if (vectorTaskColumnInfo != null) { + VectorGroupByDesc vectorGroupByDesc = + (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc(); + if (!vectorGroupByDesc.isVectorOutput()) { + vectorTaskColumnInfo.setGroupByVectorOutput(false); + } + VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); + if (usesVectorUDFAdaptor(vecKeyExpressions)) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + if (usesVectorUDFAdaptor(vecAggr.inputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } + } + } + + } + break; case FILESINK: + { + FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf(); + VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc(); + fileSinkDesc.setVectorDesc(vectorFileSinkDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), fileSinkDesc, vContext); + isNative = false; + } + break; case LIMIT: - case EXTRACT: + { + LimitDesc limitDesc = (LimitDesc) op.getConf(); + VectorLimitDesc vectorLimitDesc = new VectorLimitDesc(); + limitDesc.setVectorDesc(vectorLimitDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), limitDesc, vContext); + isNative = true; + } + break; case EVENT: + { + AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf(); + VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc(); + eventDesc.setVectorDesc(vectorEventDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), eventDesc, vContext); + isNative = true; + } + break; case HASHTABLESINK: - vectorOp = OperatorFactory.getVectorOperator( - op.getCompilationOpContext(), op.getConf(), vContext); + { + HashTableSinkDesc hashTableSinkDesc = (HashTableSinkDesc) op.getConf(); + VectorHashTableSinkDesc vectorEventDesc = new VectorHashTableSinkDesc(); + hashTableSinkDesc.setVectorDesc(vectorEventDesc); + vectorOp = OperatorFactory.getVectorOperator( + op.getCompilationOpContext(), hashTableSinkDesc, vContext); + isNative = true; + } break; default: + isNative = (op instanceof TableScanOperator); vectorOp = op; break; } + Preconditions.checkState(vectorOp != null); + if (vectorTaskColumnInfo != null && !isNative) { + vectorTaskColumnInfo.setAllNative(false); + } - LOG.debug("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName())); - LOG.debug("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName())); + LOG.debug("vectorizeOperator " + vectorOp.getClass().getName()); + LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName()); if (vectorOp != op) { fixupParentChildOperators(op, vectorOp); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java new file mode 100644 index 0000000..c8130a0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Why a node did not vectorize. + * + */ +public class VectorizerReason { + + private static long serialVersionUID = 1L; + + public static enum VectorizerNodeIssue { + NONE, + NODE_ISSUE, + OPERATOR_ISSUE, + EXPRESSION_ISSUE + } + + private final VectorizerNodeIssue vectorizerNodeIssue; + + private final Operator operator; + + private final String expressionTitle; + + private final String issue; + + private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue, + Operator operator, String expressionTitle, String issue) { + this.vectorizerNodeIssue = vectorizerNodeIssue; + this.operator = operator; + this.expressionTitle = expressionTitle; + this.issue = issue; + } + + public static VectorizerReason createNodeIssue(String issue) { + return new VectorizerReason( + VectorizerNodeIssue.NODE_ISSUE, + null, + null, + issue); + } + + public static VectorizerReason createOperatorIssue(Operator operator, + String issue) { + return new VectorizerReason( + VectorizerNodeIssue.OPERATOR_ISSUE, + operator, + null, + issue); + } + + public static VectorizerReason createExpressionIssue(Operator operator, + String expressionTitle, String issue) { + return new VectorizerReason( + VectorizerNodeIssue.EXPRESSION_ISSUE, + operator, + expressionTitle, + issue); + } + + @Override + public VectorizerReason clone() { + return new VectorizerReason(vectorizerNodeIssue, operator, expressionTitle, issue); + } + + public VectorizerNodeIssue getVectorizerNodeIssue() { + return vectorizerNodeIssue; + } + + public Operator getOperator() { + return operator; + } + + public String getExpressionTitle() { + return expressionTitle; + } + + public String getIssue() { + return issue; + } + + @Override + public String toString() { + String reason; + switch (vectorizerNodeIssue) { + case NODE_ISSUE: + reason = issue; + break; + case OPERATOR_ISSUE: + if (operator == null) { + reason = "Unknown operator: " + issue; + } else { + reason = operator.getType() + " operator: " + issue; + } + break; + case EXPRESSION_ISSUE: + reason = expressionTitle + " expression for " + operator.getType() + " operator: " + issue; + break; + default: + reason = "Unknown " + vectorizerNodeIssue; + } + return reason; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 4a8ff15..ebd5457 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -33,6 +33,9 @@ private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; + private boolean vectorization = false; + private boolean vectorizationOnly = false; + private boolean vectorizationDetail = false; private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -98,6 +101,30 @@ public void setUserLevelExplain(boolean userLevelExplain) { this.userLevelExplain = userLevelExplain; } + public boolean isVectorization() { + return vectorization; + } + + public void setVectorization(boolean vectorization) { + this.vectorization = vectorization; + } + + public boolean isVectorizationOnly() { + return vectorizationOnly; + } + + public void setVectorizationOnly(boolean vectorizationOnly) { + this.vectorizationOnly = vectorizationOnly; + } + + public boolean isVectorizationDetail() { + return vectorizationDetail; + } + + public void setVectorizationDetail(boolean vectorizationDetail) { + this.vectorizationDetail = vectorizationDetail; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index e0a1d3c..55a7c48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -84,6 +84,14 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } else if (explainOptions == HiveParser.KW_ANALYZE) { config.setAnalyze(AnalyzeState.RUNNING); config.setExplainRootPath(ctx.getMRTmpPath()); + } else if (explainOptions == HiveParser.KW_VECTORIZATION) { + config.setVectorization(true); + if (ast.getFirstChildWithType(HiveParser.TOK_ONLY) != null) { + config.setVectorizationOnly(true); + } + if (ast.getFirstChildWithType(HiveParser.TOK_DETAIL) != null) { + config.setVectorizationDetail(true); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 5d3fa6a..62732da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -335,6 +335,9 @@ KW_KEY: 'KEY'; KW_ABORT: 'ABORT'; KW_EXTRACT: 'EXTRACT'; KW_FLOOR: 'FLOOR'; +KW_VECTORIZATION: 'VECTORIZATION'; +KW_SUMMARY: 'SUMMARY'; +KW_DETAIL: 'DETAIL'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index bf78545..4e5c08b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -380,6 +380,9 @@ TOK_ROLLBACK; TOK_SET_AUTOCOMMIT; TOK_CACHE_METADATA; TOK_ABORT_TRANSACTIONS; +TOK_ONLY; +TOK_DETAIL; +TOK_SUMMARY; } @@ -723,7 +726,24 @@ explainStatement explainOption @init { msgs.push("explain option"); } @after { msgs.pop(); } - : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE| + KW_VECTORIZATION vectorizationOnly? vectorizatonDetail? + ; + +vectorizationOnly +@init { pushMsg("vectorization's only clause", state); } +@after { popMsg(state); } + : KW_ONLY + -> ^(TOK_ONLY) + ; + +vectorizatonDetail +@init { pushMsg("vectorization's detail level clause", state); } +@after { popMsg(state); } + : KW_SUMMARY + -> ^(TOK_SUMMARY) + | KW_DETAIL + -> ^(TOK_DETAIL) ; execStatement diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index e217bdf..2c14203 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -29,6 +29,10 @@ public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; + + // Extra parameters only for vectorization. + protected VectorDesc vectorDesc; + protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map opProps; @@ -64,6 +68,14 @@ public void setVectorMode(boolean vm) { this.vectorMode = vm; } + public void setVectorDesc(VectorDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorDesc getVectorDesc() { + return vectorDesc; + } + @Override public OpTraits getTraits() { return opTraits; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java index 5157ebd..4304b11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java @@ -18,10 +18,24 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.Operator; + public class AbstractVectorDesc implements VectorDesc { + private static long serialVersionUID = 1L; + + private Class vectorOpClass; + @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException("clone not supported"); } + + public void setVectorOp(Class vectorOpClass) { + this.vectorOpClass = vectorOpClass; + } + + public Class getVectorOpClass() { + return vectorOpClass; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java index 264f959..3c54bab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.IOException; +import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.io.DataOutputBuffer; @@ -60,4 +63,30 @@ public void setTable(TableDesc table) { public void writeEventHeader(DataOutputBuffer buffer) throws IOException { // nothing to add } + + public class AppMasterEventOperatorExplainVectorization extends OperatorExplainVectorization { + + private final AppMasterEventDesc appMasterEventDesc; + private final VectorAppMasterEventDesc vectorAppMasterEventDesc; + + public AppMasterEventOperatorExplainVectorization(AppMasterEventDesc appMasterEventDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.appMasterEventDesc = appMasterEventDesc; + vectorAppMasterEventDesc = (VectorAppMasterEventDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + return VectorizationCondition.getConditionsSupported(true); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { + if (vectorDesc == null) { + return null; + } + return new AppMasterEventOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 13a0811..518f2cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -33,7 +33,9 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -65,12 +67,25 @@ public BaseWork(String name) { private String name; - // Vectorization. + /* + * Vectorization. + */ + + // This will be true if a node was examined by the Vectorizer class. + protected boolean vectorizationExamined; + + protected boolean vectorizationEnabled; protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; + private VectorizerReason notVectorizedReason; + + private boolean groupByVectorOutput; + private boolean allNative; + private boolean usesVectorUDFAdaptor; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -163,6 +178,22 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { // ----------------------------------------------------------------------------------------------- + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public void setVectorizationEnabled(boolean vectorizationEnabled) { + this.vectorizationEnabled = vectorizationEnabled; + } + + public boolean getVectorizationEnabled() { + return vectorizationEnabled; + } + /* * The vectorization context for creating the VectorizedRowBatch for the node. */ @@ -174,23 +205,97 @@ public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - /* - * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable - * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated - * vectorizing this node. - * - * When Vectorized Input File Format looks at this flag, it can determine whether it should - * operate vectorized or not. In some modes, the node can be vectorized but use row - * serialization. - */ - public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { - this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { + this.notVectorizedReason = notVectorizedReason; + } + + public VectorizerReason getNotVectorizedReason() { + return notVectorizedReason; + } + + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + + public boolean getGroupByVectorOutput() { + return groupByVectorOutput; + } + + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; + } + + public boolean getUsesVectorUDFAdaptor() { + return usesVectorUDFAdaptor; } - public boolean getUseVectorizedInputFileFormat() { - return useVectorizedInputFileFormat; + public void setAllNative(boolean allNative) { + this.allNative = allNative; } + public boolean getAllNative() { + return allNative; + } + + public static class BaseExplainVectorization { + + private final BaseWork baseWork; + + public BaseExplainVectorization(BaseWork baseWork) { + this.baseWork = baseWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return baseWork.getVectorizationEnabled(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean vectorized() { + if (!baseWork.getVectorizationEnabled()) { + return null; + } + return baseWork.getVectorMode(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String notVectorizedReason() { + if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { + return null; + } + VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + return "Unknown"; + } + return notVectorizedReason.toString(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean groupByRowOutputCascade() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getGroupByVectorOutput(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean nativeVectorized() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getAllNative(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean usesVectorUDFAdaptor() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getUsesVectorUDFAdaptor(); + } + } + + // ----------------------------------------------------------------------------------------------- /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index b0b6c3a..ad654c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -45,4 +45,9 @@ public boolean in(Level[] levels) { boolean displayOnlyOnTrue() default false; boolean skipHeader() default false; + + public enum Vectorization { + PATH, SUMMARY, DETAIL, NON_VECTORIZED; + }; + Vectorization vectorization() default Vectorization.NON_VECTORIZED; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 9f4767c..5712243 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -117,6 +117,18 @@ public boolean isFormatted() { return config.isFormatted(); } + public boolean isVectorization() { + return config.isVectorization(); + } + + public boolean isVectorizationOnly() { + return config.isVectorizationOnly(); + } + + public boolean isVectorizationDetail() { + return config.isVectorizationDetail(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 8ea6440..bd4214c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; @@ -30,14 +31,17 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.DETAIL) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -265,7 +269,8 @@ public void setLeastNumRows(int leastNumRows) { this.leastNumRows = leastNumRows; } - @Explain(displayName = "Processor Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Processor Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.DETAIL) public Operator getSource() { return source; } @@ -307,4 +312,43 @@ public String toString() { return ret; } + + // ----------------------------------------------------------------------------------------------- + + private boolean vectorizationExamined; + + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public class FetchExplainVectorization { + + private final FetchWork fetchWork; + + public FetchExplainVectorization(FetchWork fetchWork) { + this.fetchWork = fetchWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return false; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Fetch Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FetchExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new FetchExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 07ed4fd..60b9ea0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * FileSinkDesc. @@ -474,4 +474,24 @@ public void setStatsTmpDir(String statsCollectionTempDir) { this.statsTmpDir = statsCollectionTempDir; } + public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "File Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FileSinkOperatorExplainVectorization getFileSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new FileSinkOperatorExplainVectorization(vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index fa20798..5c73d8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -177,6 +178,7 @@ public void setSyntheticJoinPredicate(boolean syntheticJoinPredicate) { this.syntheticJoinPredicate = syntheticJoinPredicate; } + @Override public Object clone() { FilterDesc filterDesc = new FilterDesc(getPredicate().clone(), getIsSamplingPred()); @@ -186,4 +188,35 @@ public Object clone() { filterDesc.setSortedFilter(isSortedFilter()); return filterDesc; } + + public class FilterOperatorExplainVectorization extends OperatorExplainVectorization { + + private final FilterDesc filterDesc; + private final VectorFilterDesc vectorFilterDesc; + + public FilterOperatorExplainVectorization(FilterDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorFilterDesc = (VectorFilterDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "predicateExpression", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getPredicateExpression() { + return vectorFilterDesc.getPredicateExpression().toString(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + return VectorizationCondition.getConditionsSupported(true); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Filter Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FilterOperatorExplainVectorization getFilterVectorization() { + if (vectorDesc == null) { + return null; + } + return new FilterOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..6aafee0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -21,11 +21,15 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** @@ -71,11 +75,8 @@ transient private boolean isDistinct; private boolean dontResetAggrsDistinct; - // Extra parameters only for vectorization. - private VectorGroupByDesc vectorDesc; - public GroupByDesc() { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; } public GroupByDesc( @@ -106,7 +107,7 @@ public GroupByDesc( final boolean groupingSetsPresent, final int groupingSetsPosition, final boolean isDistinct) { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -120,14 +121,6 @@ public GroupByDesc( this.isDistinct = isDistinct; } - public void setVectorDesc(VectorGroupByDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorGroupByDesc getVectorDesc() { - return vectorDesc; - } - public Mode getMode() { return mode; } @@ -311,4 +304,66 @@ public void setDistinct(boolean isDistinct) { this.isDistinct = isDistinct; } + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { + + private final GroupByDesc groupByDesc; + private final VectorGroupByDesc vectorGroupByDesc; + + public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + this.groupByDesc = groupByDesc; + vectorGroupByDesc = (VectorGroupByDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeysExpression() { + return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getAggregators() { + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + List vecAggrList = new ArrayList(vecAggregators.length); + for (VectorAggregateExpression vecAggr : vecAggregators) { + vecAggrList.add(vecAggr.toString()); + } + return vecAggrList; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getGroupByRowOutputCascade() { + return vectorGroupByDesc.isVectorOutput(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getVectorOutputConditionsNotMet() { + List results = new ArrayList(); + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + Category category = Vectorizer.aggregationOutputCategory(vecAggr); + if (category != ObjectInspector.Category.PRIMITIVE) { + results.add( + "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); + } + } + if (results.size() == 0) { + return null; + } + return results; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public GroupByOperatorExplainVectorization getGroupByVectorization() { + if (vectorDesc == null) { + return null; + } + return new GroupByOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index 94ac41e..73f04f6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -389,4 +390,30 @@ public BucketMapJoinContext getBucketMapjoinContext() { public void setBucketMapjoinContext(BucketMapJoinContext bucketMapjoinContext) { this.bucketMapjoinContext = bucketMapjoinContext; } + + public class HashTableSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final HashTableSinkDesc filterDesc; + private final VectorHashTableSinkDesc vectorHashTableSinkDesc; + + public HashTableSinkOperatorExplainVectorization(HashTableSinkDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorHashTableSinkDesc = (VectorHashTableSinkDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + return VectorizationCondition.getConditionsSupported(true); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Hash Table Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public HashTableSinkOperatorExplainVectorization getHashTableSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new HashTableSinkOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index 8448a41..58727d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -17,7 +17,10 @@ */ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -73,4 +76,24 @@ public void setLeastRows(int leastRows) { this.leastRows = leastRows; } + public class LimitOperatorExplainVectorization extends OperatorExplainVectorization { + + public LimitOperatorExplainVectorization(LimitDesc limitDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + return VectorizationCondition.getConditionsSupported(true); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Limit Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public LimitOperatorExplainVectorization getLimitVectorization() { + if (vectorDesc == null) { + return null; + } + return new LimitOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index ec35860..6e32616 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -20,14 +20,21 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; /** * Map Join operator Descriptor implementation. @@ -73,17 +80,16 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; - // Extra parameters only for vectorization. - private VectorMapJoinDesc vectorDesc; - public MapJoinDesc() { - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; bigTableBucketNumMapping = new LinkedHashMap(); } public MapJoinDesc(MapJoinDesc clone) { super(clone); - vectorDesc = new VectorMapJoinDesc(clone.vectorDesc); + if (clone.vectorDesc != null) { + throw new RuntimeException("Clone with vectorization desc not supported"); + } this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; @@ -108,7 +114,7 @@ public MapJoinDesc(final Map> keys, final int posBigTable, final JoinCondDesc[] conds, final Map> filters, boolean noOuterJoin, String dumpFilePrefix) { super(values, outputColumnNames, noOuterJoin, conds, filters, null); - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; this.keys = keys; this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; @@ -119,14 +125,6 @@ public MapJoinDesc(final Map> keys, initRetainExprList(); } - public void setVectorDesc(VectorMapJoinDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorMapJoinDesc getVectorDesc() { - return vectorDesc; - } - private void initRetainExprList() { retainList = new HashMap>(); Set>> set = super.getExprs().entrySet(); @@ -388,4 +386,143 @@ public boolean isDynamicPartitionHashJoin() { public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableMapJoinNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class MapJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final MapJoinDesc mapJoinDesc; + private final VectorMapJoinDesc vectorMapJoinDesc; + private final VectorMapJoinInfo vectorMapJoinInfo; + + private VectorizationCondition[] nativeConditions; + + public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { + // VectorMapJoinOperator is not native vectorized. + super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); + this.mapJoinDesc = mapJoinDesc; + vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; + vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorMapJoinDesc.getIsVectorizationMapJoinNativeEnabled(); + + String engine = vectorMapJoinDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); + + boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); + boolean isHybridHashJoin = vectorMapJoinDesc.getIsHybridHashJoin(); + + boolean whenFastHashTableThenNoHybrid = + (!isFastHashTableEnabled ? true : !isHybridHashJoin); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + vectorMapJoinDesc.getOneMapJoinCondition(), + "One MapJoin Condition"), + new VectorizationCondition( + !vectorMapJoinDesc.getHasNullSafes(), + "No nullsafe"), + new VectorizationCondition( + vectorMapJoinDesc.getSupportsKeyTypes(), + "Supports Key Types"), + new VectorizationCondition( + !vectorMapJoinDesc.getIsEmptyKey(), + "Not empty key"), + new VectorizationCondition( + whenFastHashTableThenNoHybrid, + "When Fast Hash Table, then requires no Hybrid Hash Join"), + new VectorizationCondition( + vectorMapJoinDesc.getSmallTableExprVectorizes(), + "Small table vectorizes"), + }; + return conditions; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeNotSupportedKeyTypes() { + return vectorMapJoinDesc.getNotSupportedKeyTypes(); + } + + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapJoinOperatorExplainVectorization getMapJoinVectorization() { + if (vectorDesc == null || this instanceof SMBJoinDesc) { + return null; + } + return new MapJoinOperatorExplainVectorization(this, vectorDesc); + } + + public class SMBJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SMBJoinDesc smbJoinDesc; + private final VectorSMBJoinDesc vectorSMBJoinDesc; + + public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, VectorDesc vectorDesc) { + // Native vectorization NOT supported. + super(vectorDesc, false); + this.smbJoinDesc = smbJoinDesc; + vectorSMBJoinDesc = (VectorSMBJoinDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + // Handle dual nature. + @Explain(vectorization = Vectorization.DETAIL, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { + if (vectorDesc == null || !(this instanceof SMBJoinDesc)) { + return null; + } + return new SMBJoinOperatorExplainVectorization((SMBJoinDesc) this, vectorDesc); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 5cc3663..47e3a8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -26,6 +26,7 @@ import java.util.BitSet; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -47,8 +48,10 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -140,6 +143,12 @@ private VectorizedRowBatch vectorizedRowBatch; + private VectorizerReason notEnabledInputFileFormatReason; + + private Set vectorizationInputFileFormatClassNameSet; + private List vectorizationEnabledConditionsMet; + private List vectorizationEnabledConditionsNotMet; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -459,7 +468,8 @@ public void replaceRoots(Map, Operator> replacementMap) { } @Override - @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.DETAIL) public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); @@ -716,4 +726,86 @@ public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } + + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + + public void setNotEnabledInputFileFormatReason(VectorizerReason notEnabledInputFileFormatReason) { + this.notEnabledInputFileFormatReason = notEnabledInputFileFormatReason; + } + + public VectorizerReason getNotEnabledInputFileFormatReason() { + return notEnabledInputFileFormatReason; + } + + public void setVectorizationInputFileFormatClassNameSet(Set vectorizationInputFileFormatClassNameSet) { + this.vectorizationInputFileFormatClassNameSet = vectorizationInputFileFormatClassNameSet; + } + + public Set getVectorizationInputFileFormatClassNameSet() { + return vectorizationInputFileFormatClassNameSet; + } + + public void setVectorizationEnabledConditionsMet(ArrayList vectorizationEnabledConditionsMet) { + this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); + } + + public List getVectorizationEnabledConditionsMet() { + return vectorizationEnabledConditionsMet; + } + + public void setVectorizationEnabledConditionsNotMet(List vectorizationEnabledConditionsNotMet) { + this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + } + + public List getVectorizationEnabledConditionsNotMet() { + return vectorizationEnabledConditionsNotMet; + } + + public class MapExplainVectorization extends BaseExplainVectorization { + + private final MapWork mapWork; + + public MapExplainVectorization(MapWork mapWork) { + super(mapWork); + this.mapWork = mapWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFileFormats", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Set inputFileFormats() { + return mapWork.getVectorizationInputFileFormatClassNameSet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsMet() { + return mapWork.getVectorizationEnabledConditionsMet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List enabledConditionsNotMet() { + return mapWork.getVectorizationEnabledConditionsNotMet(); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Map Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MapExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java index 82143a6..c5e93b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java @@ -32,13 +32,15 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.PATH) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index aa7f6ed..28e4c89 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -24,14 +24,15 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredWork. * */ -@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.PATH) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java new file mode 100644 index 0000000..db77c09 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + +public class OperatorExplainVectorization { + + protected final VectorDesc vectorDesc; + + protected final boolean isNative; + + public OperatorExplainVectorization(VectorDesc vectorDesc, boolean isNative) { + this.vectorDesc = vectorDesc; + this.isNative = isNative; + } + + public List vectorExpressionsToStringList(VectorExpression[] vectorExpressions) { + if (vectorExpressions == null) { + return null; + } + List vecExprList = new ArrayList(vectorExpressions.length); + for (VectorExpression vecExpr : vectorExpressions) { + vecExprList.add(vecExpr.toString()); + } + return vecExprList; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getVectorized() { + return true; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "className", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getClassName() { + return vectorDesc.getVectorOpClass().getSimpleName(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "native", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean getNative() { + if (!getVectorized()) { + return null; + } + return isNative; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index d7e404c..e0fbcd2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -19,11 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,9 +128,6 @@ private ReducerTraits(int trait) { private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class); - // Extra parameters only for vectorization. - private VectorReduceSinkDesc vectorDesc; - public ReduceSinkDesc() { } @@ -187,14 +191,6 @@ public Object clone() { return desc; } - public void setVectorDesc(VectorReduceSinkDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorReduceSinkDesc getVectorDesc() { - return vectorDesc; - } - public java.util.ArrayList getOutputKeyColumnNames() { return outputKeyColumnNames; } @@ -490,4 +486,105 @@ public void setHasOrderBy(boolean hasOrderBy) { this.hasOrderBy = hasOrderBy; } + // Use LinkedHashSet to give predictable display order. + private static Set vectorizableReduceSinkNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final ReduceSinkDesc reduceSinkDesc; + private final VectorReduceSinkDesc vectorReduceSinkDesc; + private final VectorReduceSinkInfo vectorReduceSinkInfo; + + private VectorizationCondition[] nativeConditions; + + public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, VectorDesc vectorDesc) { + // VectorReduceSinkOperator is not native vectorized. + super(vectorDesc, ((VectorReduceSinkDesc) vectorDesc).reduceSinkKeyType()!= ReduceSinkKeyType.NONE); + this.reduceSinkDesc = reduceSinkDesc; + vectorReduceSinkDesc = (VectorReduceSinkDesc) vectorDesc; + vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "valueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled(); + + String engine = vectorReduceSinkDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + !vectorReduceSinkDesc.getAcidChange(), + "Not ACID UPDATE or DELETE"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasBuckets(), + "No buckets"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasTopN(), + "No TopN"), + new VectorizationCondition( + vectorReduceSinkDesc.getUseUniformHash(), + "Uniform Hash"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasDistinctColumns(), + "No DISTINCT columns"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsKeyBinarySortable(), + "BinarySortableSerDe for keys"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsValueLazyBinary(), + "LazyBinarySerDe for values") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new ReduceSinkOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 72fc4ca..43ab801 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -19,17 +19,23 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -89,6 +95,9 @@ public ReduceWork(String name) { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; + private boolean reduceVectorizationEnabled; + private String vectorReduceEngine; + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -160,7 +169,8 @@ public String getExecutionMode() { return null; } - @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.DETAIL) public Operator getReducer() { return reducer; } @@ -252,4 +262,81 @@ public int getMaxReduceTasks() { public void setMaxReduceTasks(int maxReduceTasks) { this.maxReduceTasks = maxReduceTasks; } + + public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) { + this.reduceVectorizationEnabled = reduceVectorizationEnabled; + } + + public boolean getReduceVectorizationEnabled() { + return reduceVectorizationEnabled; + } + + public void setVectorReduceEngine(String vectorReduceEngine) { + this.vectorReduceEngine = vectorReduceEngine; + } + + public String getVectorReduceEngine() { + return vectorReduceEngine; + } + + // Use LinkedHashSet to give predictable display order. + private static Set reduceVectorizableEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + + public class ReduceExplainVectorization extends BaseExplainVectorization { + + private final ReduceWork reduceWork; + + private VectorizationCondition[] reduceVectorizationConditions; + + public ReduceExplainVectorization(ReduceWork reduceWork) { + super(reduceWork); + this.reduceWork = reduceWork; + } + + private VectorizationCondition[] createReduceExplainVectorizationConditions() { + + boolean enabled = reduceWork.getReduceVectorizationEnabled(); + + String engine = reduceWork.getVectorReduceEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines; + + boolean engineInSupported = reduceVectorizableEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName) + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(reduceVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new ReduceExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index 67a8327..8c3649b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -20,7 +20,9 @@ import java.util.ArrayList; import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -135,4 +137,36 @@ public boolean isSelStarNoCompute() { public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } + + + public class SelectOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SelectDesc selectDesc; + private final VectorSelectDesc vectorSelectDesc; + + public SelectOperatorExplainVectorization(SelectDesc selectDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.selectDesc = selectDesc; + vectorSelectDesc = (VectorSelectDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "selectExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSelectExpressions() { + return vectorExpressionsToStringList(vectorSelectDesc.getSelectExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + return VectorizationCondition.getConditionsSupported(true); + } + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "Select Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SelectOperatorExplainVectorization getSelectVectorization() { + if (vectorDesc == null) { + return null; + } + return new SelectOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index 7a70e6b..c1973b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * TezWork. This class encapsulates all the work objects that can be executed @@ -49,7 +49,8 @@ * */ @SuppressWarnings("serial") -@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.PATH) public class TezWork extends AbstractOperatorDesc { public enum VertexType { @@ -107,7 +108,8 @@ public String getDagId() { /** * getWorkMap returns a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.PATH) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { @@ -306,7 +308,8 @@ public int compareTo(Dependency o) { } } - @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.PATH) public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); for (Map.Entry> entry: invertedWorkGraph.entrySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java new file mode 100644 index 0000000..2e11321 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorAppMasterEventDesc. + * + * Extra parameters beyond AppMasterEventDesc just for the VectorAppMasterEventDescOperator. + * + * We don't extend AppMasterEventDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorAppMasterEventDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorAppMasterEventDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java index 3a2efdb..078408c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java @@ -22,4 +22,9 @@ public interface VectorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + + public void setVectorOp(Class vectorOpClass); + + public Class getVectorOpClass(); + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java new file mode 100644 index 0000000..325ac91 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFileSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorFileSinkDesc. + * + * Extra parameters beyond FileSinkDesc just for the VectorFileSinkOperator. + * + * We don't extend FileSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFileSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorFileSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java new file mode 100644 index 0000000..6feed84 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorFilterDesc.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorFilterDesc. + * + * Extra parameters beyond FilterDesc just for the VectorFilterOperator. + * + * We don't extend FilterDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorFilterDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression predicateExpression; + + public VectorFilterDesc() { + } + + public void setPredicateExpression(VectorExpression predicateExpression) { + this.predicateExpression = predicateExpression; + } + + public VectorExpression getPredicateExpression() { + return predicateExpression; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index 08f8ebf..fab1090 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; + /** * VectorGroupByDesc. * @@ -59,6 +62,9 @@ private boolean isVectorOutput; + private VectorExpression[] keyExpressions; + private VectorAggregateExpression[] aggregators; + public VectorGroupByDesc() { this.processingMode = ProcessingMode.NONE; this.isVectorOutput = false; @@ -79,6 +85,22 @@ public void setVectorOutput(boolean isVectorOutput) { this.isVectorOutput = isVectorOutput; } + public void setKeyExpressions(VectorExpression[] keyExpressions) { + this.keyExpressions = keyExpressions; + } + + public VectorExpression[] getKeyExpressions() { + return keyExpressions; + } + + public void setAggregators(VectorAggregateExpression[] aggregators) { + this.aggregators = aggregators; + } + + public VectorAggregateExpression[] getAggregators() { + return aggregators; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorHashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorHashTableSinkDesc.java new file mode 100644 index 0000000..392ffad --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorHashTableSinkDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorHashTableSinkDesc. + * + * Extra parameters beyond HashTableSinkDesc just for the VectorHashTableSinkOperator. + * + * We don't extend HashTableSinkDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorHashTableSinkDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorHashTableSinkDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java new file mode 100644 index 0000000..c9bc45a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorLimitDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorLimitDesc. + * + * Extra parameters beyond LimitDesc just for the VectorLimitOperator. + * + * We don't extend LimitDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorLimitDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorLimitDesc() { + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 8ea230f..1252140 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -18,9 +18,13 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import com.google.common.base.Preconditions; + /** * VectorGroupByDesc. * @@ -79,23 +83,38 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } } + public static enum OperatorVariation { + NONE, + INNER_BIG_ONLY, + INNER, + LEFT_SEMI, + OUTER + } + private HashTableImplementationType hashTableImplementationType; private HashTableKind hashTableKind; private HashTableKeyType hashTableKeyType; + private OperatorVariation operatorVariation; private boolean minMaxEnabled; + private VectorMapJoinInfo vectorMapJoinInfo; + public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; + operatorVariation = OperatorVariation.NONE; minMaxEnabled = false; + vectorMapJoinInfo = null; } public VectorMapJoinDesc(VectorMapJoinDesc clone) { this.hashTableImplementationType = clone.hashTableImplementationType; this.hashTableKind = clone.hashTableKind; this.hashTableKeyType = clone.hashTableKeyType; + this.operatorVariation = clone.operatorVariation; this.minMaxEnabled = clone.minMaxEnabled; + this.vectorMapJoinInfo = clone.vectorMapJoinInfo; } public HashTableImplementationType hashTableImplementationType() { @@ -122,6 +141,14 @@ public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { this.hashTableKeyType = hashTableKeyType; } + public OperatorVariation operatorVariation() { + return operatorVariation; + } + + public void setOperatorVariation(OperatorVariation operatorVariation) { + this.operatorVariation = operatorVariation; + } + public boolean minMaxEnabled() { return minMaxEnabled; } @@ -129,4 +156,87 @@ public boolean minMaxEnabled() { public void setMinMaxEnabled(boolean minMaxEnabled) { this.minMaxEnabled = minMaxEnabled; } + + public void setVectorMapJoinInfo(VectorMapJoinInfo vectorMapJoinInfo) { + Preconditions.checkState(vectorMapJoinInfo != null); + this.vectorMapJoinInfo = vectorMapJoinInfo; + } + + public VectorMapJoinInfo getVectorMapJoinInfo() { + return vectorMapJoinInfo; + } + + private boolean isVectorizationMapJoinNativeEnabled; + private String engine; + private boolean oneMapJoinCondition; + private boolean hasNullSafes; + private boolean isFastHashTableEnabled; + private boolean isHybridHashJoin; + private boolean supportsKeyTypes; + private List notSupportedKeyTypes; + private boolean isEmptyKey; + private boolean smallTableExprVectorizes; + + public void setIsVectorizationMapJoinNativeEnabled(boolean isVectorizationMapJoinNativeEnabled) { + this.isVectorizationMapJoinNativeEnabled = isVectorizationMapJoinNativeEnabled; + } + public boolean getIsVectorizationMapJoinNativeEnabled() { + return isVectorizationMapJoinNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setOneMapJoinCondition(boolean oneMapJoinCondition) { + this.oneMapJoinCondition = oneMapJoinCondition; + } + public boolean getOneMapJoinCondition() { + return oneMapJoinCondition; + } + public void setHasNullSafes(boolean hasNullSafes) { + this.hasNullSafes = hasNullSafes; + } + public boolean getHasNullSafes() { + return hasNullSafes; + } + public void setSupportsKeyTypes(boolean supportsKeyTypes) { + this.supportsKeyTypes = supportsKeyTypes; + } + public boolean getSupportsKeyTypes() { + return supportsKeyTypes; + } + public void setNotSupportedKeyTypes(List notSupportedKeyTypes) { + this.notSupportedKeyTypes = notSupportedKeyTypes; + } + public List getNotSupportedKeyTypes() { + return notSupportedKeyTypes; + } + public void setIsEmptyKey(boolean isEmptyKey) { + this.isEmptyKey = isEmptyKey; + } + public boolean getIsEmptyKey() { + return isEmptyKey; + } + public void setSmallTableExprVectorizes(boolean smallTableExprVectorizes) { + this.smallTableExprVectorizes = smallTableExprVectorizes; + } + public boolean getSmallTableExprVectorizes() { + return smallTableExprVectorizes; + } + + public void setIsFastHashTableEnabled(boolean isFastHashTableEnabled) { + this.isFastHashTableEnabled = isFastHashTableEnabled; + } + public boolean getIsFastHashTableEnabled() { + return isFastHashTableEnabled; + } + public void setIsHybridHashJoin(boolean isHybridHashJoin) { + this.isHybridHashJoin = isHybridHashJoin; + } + public boolean getIsHybridHashJoin() { + return isHybridHashJoin; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java new file mode 100644 index 0000000..2cf2e72 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorMapJoinInfo. + * + * A convenience data structure that has information needed to vectorize map join. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and again by the VectorMapJoinOperator's + * constructors and later during execution. + */ +public class VectorMapJoinInfo { + + private static long serialVersionUID = 1L; + + private int[] bigTableKeyColumnMap; + private String[] bigTableKeyColumnNames; + private TypeInfo[] bigTableKeyTypeInfos; + private VectorExpression[] bigTableKeyExpressions; + + private int[] bigTableValueColumnMap; + private String[] bigTableValueColumnNames; + private TypeInfo[] bigTableValueTypeInfos; + private VectorExpression[] bigTableValueExpressions; + + private VectorColumnOutputMapping bigTableRetainedMapping; + private VectorColumnOutputMapping bigTableOuterKeyMapping; + private VectorColumnSourceMapping smallTableMapping; + + private VectorColumnSourceMapping projectionMapping; + + public VectorMapJoinInfo() { + bigTableKeyColumnMap = null; + bigTableKeyColumnNames = null; + bigTableKeyTypeInfos = null; + bigTableKeyExpressions = null; + + bigTableValueColumnMap = null; + bigTableValueColumnNames = null; + bigTableValueTypeInfos = null; + bigTableValueExpressions = null; + + bigTableRetainedMapping = null; + bigTableOuterKeyMapping = null; + smallTableMapping = null; + + projectionMapping = null; + } + + public int[] getBigTableKeyColumnMap() { + return bigTableKeyColumnMap; + } + + public void setBigTableKeyColumnMap(int[] bigTableKeyColumnMap) { + this.bigTableKeyColumnMap = bigTableKeyColumnMap; + } + + public String[] getBigTableKeyColumnNames() { + return bigTableKeyColumnNames; + } + + public void setBigTableKeyColumnNames(String[] bigTableKeyColumnNames) { + this.bigTableKeyColumnNames = bigTableKeyColumnNames; + } + + public TypeInfo[] getBigTableKeyTypeInfos() { + return bigTableKeyTypeInfos; + } + + public void setBigTableKeyTypeInfos(TypeInfo[] bigTableKeyTypeInfos) { + this.bigTableKeyTypeInfos = bigTableKeyTypeInfos; + } + + public VectorExpression[] getBigTableKeyExpressions() { + return bigTableKeyExpressions; + } + + public void setBigTableKeyExpressions(VectorExpression[] bigTableKeyExpressions) { + this.bigTableKeyExpressions = bigTableKeyExpressions; + } + + + public int[] getBigTableValueColumnMap() { + return bigTableValueColumnMap; + } + + public void setBigTableValueColumnMap(int[] bigTableValueColumnMap) { + this.bigTableValueColumnMap = bigTableValueColumnMap; + } + + public String[] getBigTableValueColumnNames() { + return bigTableValueColumnNames; + } + + public void setBigTableValueColumnNames(String[] bigTableValueColumnNames) { + this.bigTableValueColumnNames = bigTableValueColumnNames; + } + + public TypeInfo[] getBigTableValueTypeInfos() { + return bigTableValueTypeInfos; + } + + public void setBigTableValueTypeInfos(TypeInfo[] bigTableValueTypeInfos) { + this.bigTableValueTypeInfos = bigTableValueTypeInfos; + } + + public VectorExpression[] getBigTableValueExpressions() { + return bigTableValueExpressions; + } + + public void setBigTableValueExpressions(VectorExpression[] bigTableValueExpressions) { + this.bigTableValueExpressions = bigTableValueExpressions; + } + + public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { + this.bigTableRetainedMapping = bigTableRetainedMapping; + } + + public VectorColumnOutputMapping getBigTableRetainedMapping() { + return bigTableRetainedMapping; + } + + public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { + this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + } + + public VectorColumnOutputMapping getBigTableOuterKeyMapping() { + return bigTableOuterKeyMapping; + } + + public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { + this.smallTableMapping = smallTableMapping; + } + + public VectorColumnSourceMapping getSmallTableMapping() { + return smallTableMapping; + } + + public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { + this.projectionMapping = projectionMapping; + } + + public VectorColumnSourceMapping getProjectionMapping() { + return projectionMapping; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java index c56bff6..288a440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java @@ -61,4 +61,72 @@ public void setVectorReduceSinkInfo(VectorReduceSinkInfo vectorReduceSinkInfo) { public VectorReduceSinkInfo getVectorReduceSinkInfo() { return vectorReduceSinkInfo; } + + private boolean isVectorizationReduceSinkNativeEnabled; + private String engine; + private boolean acidChange; + private boolean hasBuckets; + private boolean hasTopN; + private boolean useUniformHash; + private boolean hasDistinctColumns; + private boolean isKeyBinarySortable; + private boolean isValueLazyBinary; + + /* + * The following conditions are for native Vector ReduceSink. + */ + public void setIsVectorizationReduceSinkNativeEnabled(boolean isVectorizationReduceSinkNativeEnabled) { + this.isVectorizationReduceSinkNativeEnabled = isVectorizationReduceSinkNativeEnabled; + } + public boolean getIsVectorizationReduceSinkNativeEnabled() { + return isVectorizationReduceSinkNativeEnabled; + } + public void setEngine(String engine) { + this.engine = engine; + } + public String getEngine() { + return engine; + } + public void setAcidChange(boolean acidChange) { + this.acidChange = acidChange; + } + public boolean getAcidChange() { + return acidChange; + } + public void setHasBuckets(boolean hasBuckets) { + this.hasBuckets = hasBuckets; + } + public boolean getHasBuckets() { + return hasBuckets; + } + public void setHasTopN(boolean hasTopN) { + this.hasTopN = hasTopN; + } + public boolean getHasTopN() { + return hasTopN; + } + public void setUseUniformHash(boolean useUniformHash) { + this.useUniformHash = useUniformHash; + } + public boolean getUseUniformHash() { + return useUniformHash; + } + public void setHasDistinctColumns(boolean hasDistinctColumns) { + this.hasDistinctColumns = hasDistinctColumns; + } + public boolean getHasDistinctColumns() { + return hasDistinctColumns; + } + public void setIsKeyBinarySortable(boolean isKeyBinarySortable) { + this.isKeyBinarySortable = isKeyBinarySortable; + } + public boolean getIsKeyBinarySortable() { + return isKeyBinarySortable; + } + public void setIsValueLazyBinary(boolean isValueLazyBinary) { + this.isValueLazyBinary = isValueLazyBinary; + } + public boolean getIsValueLazyBinary() { + return isValueLazyBinary; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java new file mode 100644 index 0000000..ab578cd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSMBJoinDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorSMBMapJoinDesc. + * + * Extra parameters beyond SMBMapJoinDesc just for the VectorSMBMapJoinOperator. + * + * We don't extend SMBMapJoinDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSMBJoinDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + public VectorSMBJoinDesc() { + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java new file mode 100644 index 0000000..2ecc4d8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorSelectDesc.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * VectorSelectDesc. + * + * Extra parameters beyond SelectDesc just for the VectorSelectOperator. + * + * We don't extend SelectDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorSelectDesc extends AbstractVectorDesc { + + private static long serialVersionUID = 1L; + + private VectorExpression[] selectExpressions; + + public VectorSelectDesc() { + } + + public void setSelectExpressions(VectorExpression[] selectExpressions) { + this.selectExpressions = selectExpressions; + } + + public VectorExpression[] getSelectExpressions() { + return selectExpressions; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java new file mode 100644 index 0000000..32b62e8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorizationCondition.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class VectorizationCondition { + + private final boolean flag; + private final String conditionName; + + public VectorizationCondition(boolean flag, String conditionName) { + this.flag = flag; + this.conditionName = conditionName; + } + + public boolean getFlag() { + return flag; + } + + public String getConditionName() { + return conditionName; + } + + public static List getConditionsMet(VectorizationCondition[] conditions) { + List metList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (condition.getFlag()) { + metList.add(condition.getConditionName() + " IS true"); + } + } + return metList; + } + + public static List getConditionsNotMet(VectorizationCondition[] conditions) { + List notMetList = new ArrayList(); + for (VectorizationCondition condition : conditions) { + if (!condition.getFlag()) { + notMetList.add(condition.getConditionName() + " IS false"); + } + } + return notMetList; + } + + public static List addBooleans(List conditions, boolean flag) { + ArrayList result = new ArrayList(conditions.size()); + for (String condition : conditions) { + result.add(condition + " IS " + flag); + } + return result; + } + + // Helper method. + public static List getConditionsSupported(boolean isSupported) { + return Arrays.asList("Supported IS " + isSupported); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index d3bb84d..22b845d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -25,13 +25,19 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import org.junit.Test; /** @@ -89,10 +95,15 @@ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext("name", columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); - return new VectorFilterOperator(new CompilationOpContext(), vc, fdesc); + + Operator filterOp = + OperatorFactory.get(new CompilationOpContext(), fdesc); + + VectorizationContext vc = new VectorizationContext("name", columns); + + return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc); } @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index f5b5d9d..086c0b7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -39,16 +39,20 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromRepeats; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -128,9 +132,11 @@ private static GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.GLOBAL); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.GLOBAL); return desc; } @@ -146,6 +152,8 @@ private static GroupByDesc buildGroupByDescCountStar( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); @@ -162,7 +170,7 @@ private static GroupByDesc buildKeyGroupByDesc( TypeInfo keyTypeInfo) { GroupByDesc desc = buildGroupByDescType(ctx, aggregate, GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo); ArrayList keys = new ArrayList(); @@ -196,7 +204,11 @@ public void testMemoryPressureFlush() throws HiveException { desc.setMemoryThreshold(treshold); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1735,13 +1747,19 @@ private void testMultiKey( } GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); desc.setKeys(keysDesc); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -1846,9 +1864,11 @@ private void testKeyTypeAggregate( outputColumnNames.add("_col1"); GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0])); @@ -1857,7 +1877,11 @@ private void testKeyTypeAggregate( desc.setKeys(keysDesc); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2252,10 +2276,14 @@ public void testAggregateCountStarIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); - desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2283,10 +2311,14 @@ public void testAggregateCountReduceIterable ( VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); + VectorGroupByDesc vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorDesc.setProcessingMode(ProcessingMode.GLOBAL); // Use GLOBAL when no key for Reduce. CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2318,7 +2350,11 @@ public void testAggregateStringIterable ( TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2350,7 +2386,11 @@ public void testAggregateDecimalIterable ( buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2383,7 +2423,11 @@ public void testAggregateDoubleIterable ( TypeInfoFactory.doubleTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2414,7 +2458,11 @@ public void testAggregateLongIterable ( GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(null, null); @@ -2449,7 +2497,11 @@ public void testAggregateLongKeyIterable ( TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); @@ -2515,7 +2567,11 @@ public void testAggregateStringKeyIterable ( dataTypeInfo, "Key", TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); - VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); + + Operator groupByOp = OperatorFactory.get(cCtx, desc); + + VectorGroupByOperator vgo = + (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 779177a..4c327e6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -51,6 +53,7 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, VectorizationContext ctxt, OperatorDesc conf) throws HiveException { super(ctx, ctxt, conf); + initializeOp(null); } @@ -115,6 +118,18 @@ public void testSelectOperator() throws HiveException { outputColNames.add("_col1"); selDesc.setOutputColumnNames(outputColNames); + // CONSIDER unwinding ValidatorVectorSelectOperator as a subclass of VectorSelectOperator. + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selDesc.setVectorDesc(vectorSelectDesc); + List selectColList = selDesc.getColList(); + VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()]; + for (int i = 0; i < selectColList.size(); i++) { + ExprNodeDesc expr = selectColList.get(i); + VectorExpression ve = vc.getVectorExpression(expr); + vectorSelectExprs[i] = ve; + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + ValidatorVectorSelectOperator vso = new ValidatorVectorSelectOperator( new CompilationOpContext(), vc, selDesc); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 3295372..c84b84f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -112,7 +112,7 @@ public void testAggregateOnUDF() throws HiveException { Vectorizer v = new Vectorizer(); Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); - VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false); + VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext, false, null); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; Assert.assertEquals(FuncAbsLongToLong.class, udaf.getInputExpression().getClass()); @@ -147,8 +147,8 @@ public void testValidateNestedExpressions() { andExprDesc.setChildren(children3); Vectorizer v = new Vectorizer(); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER)); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION)); } /** diff --git ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q index cef4e4c..fb0762b 100644 --- ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q +++ ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q @@ -32,7 +32,7 @@ insert into table count_case_groupby values ('key1', true),('key2', false),('key set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization detail select c2 regexp 'val', c4 regexp 'val', @@ -45,7 +45,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -58,7 +58,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -74,7 +74,7 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization detail select c2 regexp 'val', c4 regexp 'val', @@ -87,7 +87,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -100,7 +100,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -116,11 +116,11 @@ from varchar_udf_1 limit 1; set hive.vectorized.adaptor.usage.mode=none; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -135,11 +135,11 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=chosen; -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), @@ -155,14 +155,14 @@ FROM DECIMAL_UDF WHERE key = 10; set hive.vectorized.adaptor.usage.mode=none; -explain +explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; set hive.vectorized.adaptor.usage.mode=chosen; -explain +explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; diff --git ql/src/test/queries/clientpositive/vector_aggregate_9.q ql/src/test/queries/clientpositive/vector_aggregate_9.q index ce6f0ff..7a8bfe6 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_9.q +++ ql/src/test/queries/clientpositive/vector_aggregate_9.q @@ -38,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q index 8a63635..ee8b6ea 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q +++ ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q @@ -12,5 +12,5 @@ values (7,20150404, '2015-04-04'); set hive.vectorized.execution.enabled=true; set hive.map.aggr=true; -explain select max(dt), max(greg_dt) from testvec where id=5; +explain vectorization select max(dt), max(greg_dt) from testvec where id=5; select max(dt), max(greg_dt) from testvec where id=5; diff --git ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q index 2077f8e..367bd7e 100644 --- ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q +++ ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=true; +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; ; @@ -23,7 +23,7 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.auto.convert.sortmerge.join=true; -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1; @@ -33,7 +33,7 @@ select count(*) from ( ) subq1; -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select key, count(*) from @@ -54,7 +54,7 @@ select count(*) from -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization detail select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -89,7 +89,7 @@ on src1.key = src2.key; -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -104,7 +104,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization detail select count(*) from ( select * from @@ -129,7 +129,7 @@ select count(*) from -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select * from @@ -169,7 +169,7 @@ select count(*) from -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization detail select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -184,7 +184,7 @@ select count(*) from -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization detail select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -199,7 +199,7 @@ select count(*) from -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key; @@ -210,7 +210,7 @@ select count(*) from -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -231,7 +231,7 @@ select count(*) from -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -261,7 +261,7 @@ CREATE TABLE dest2(key int, val1 string, val2 string); -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -282,7 +282,7 @@ CREATE TABLE dest2(key int, cnt int); -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q index 41f9243..665338f 100644 --- ql/src/test/queries/clientpositive/vector_between_columns.q +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -24,13 +24,13 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt; create table TINT stored as orc AS SELECT * FROM TINT_txt; -explain +explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; -explain +explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; diff --git ql/src/test/queries/clientpositive/vector_between_in.q ql/src/test/queries/clientpositive/vector_between_in.q index 487bf96..8216bef 100644 --- ql/src/test/queries/clientpositive/vector_between_in.q +++ ql/src/test/queries/clientpositive/vector_between_in.q @@ -4,21 +4,21 @@ SET hive.vectorized.execution.enabled=true; CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; +EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); +EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; +EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); +EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568); -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; +EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate; -EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; +EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; -EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; +EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; @@ -40,13 +40,13 @@ SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 439 -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; -EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; diff --git ql/src/test/queries/clientpositive/vector_binary_join_groupby.q ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index 1d99e34..a0f6e34 100644 --- ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -40,14 +40,14 @@ STORED AS ORC; INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100; -EXPLAIN +EXPLAIN VECTORIZATION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; -EXPLAIN +EXPLAIN VECTORIZATION SELECT count(*), bin FROM hundredorc GROUP BY bin; @@ -58,6 +58,6 @@ GROUP BY bin; -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_bround.q ql/src/test/queries/clientpositive/vector_bround.q index deea00b..2837e33 100644 --- ql/src/test/queries/clientpositive/vector_bround.q +++ ql/src/test/queries/clientpositive/vector_bround.q @@ -12,5 +12,5 @@ values (2.51, 1.251), (3.51, 1.351); set hive.vectorized.execution.enabled=true; -explain select bround(v0), bround(v1, 1) from test_vector_bround; +explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround; select bround(v0), bround(v1, 1) from test_vector_bround; diff --git ql/src/test/queries/clientpositive/vector_bucket.q ql/src/test/queries/clientpositive/vector_bucket.q index 39436c9..442919d 100644 --- ql/src/test/queries/clientpositive/vector_bucket.q +++ ql/src/test/queries/clientpositive/vector_bucket.q @@ -7,7 +7,7 @@ set hive.support.concurrency=true; CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS sequencefile; -explain +explain vectorization detail insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three'); select a, b from non_orc_table order by a; diff --git ql/src/test/queries/clientpositive/vector_cast_constant.q ql/src/test/queries/clientpositive/vector_cast_constant.q index 94bee09..7be95c0 100644 --- ql/src/test/queries/clientpositive/vector_cast_constant.q +++ ql/src/test/queries/clientpositive/vector_cast_constant.q @@ -38,7 +38,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q index f1bb75b..d8c1f34 100644 --- ql/src/test/queries/clientpositive/vector_char_2.q +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -16,7 +16,7 @@ group by value order by value asc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc @@ -35,7 +35,7 @@ group by value order by value desc limit 5; -explain select value, sum(cast(key as int)), count(*) numrows +explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q index 06f1d2b..bd8b42b 100644 --- ql/src/test/queries/clientpositive/vector_char_4.q +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -44,7 +44,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists char_lazy_binary_columnar; create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_char_mapjoin1.q ql/src/test/queries/clientpositive/vector_char_mapjoin1.q index 58a73be..73ef5d4 100644 --- ql/src/test/queries/clientpositive/vector_char_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_char_mapjoin1.q @@ -37,21 +37,21 @@ create table char_join1_vc2_orc stored as orc as select * from char_join1_vc2; create table char_join1_str_orc stored as orc as select * from char_join1_str; -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index a921140..1f4c01e 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -14,7 +14,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization select key, value from char_2 order by key asc limit 5; @@ -30,7 +30,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization select key, value from char_2 order by key desc limit 5; @@ -49,7 +49,7 @@ create table char_3 ( field char(12) ) stored as orc; -explain +explain vectorization detail insert into table char_3 select cint from alltypesorc limit 10; insert into table char_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce.q ql/src/test/queries/clientpositive/vector_coalesce.q index cfba7be..ec5f1b5 100644 --- ql/src/test/queries/clientpositive/vector_coalesce.q +++ ql/src/test/queries/clientpositive/vector_coalesce.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -16,7 +16,7 @@ WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10; -EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +EXPLAIN VECTORIZATION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c @@ -28,7 +28,7 @@ WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -40,7 +40,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +EXPLAIN VECTORIZATION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c @@ -52,7 +52,7 @@ WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10; -EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c @@ -64,7 +64,7 @@ WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10; -EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +EXPLAIN VECTORIZATION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_coalesce_2.q ql/src/test/queries/clientpositive/vector_coalesce_2.q index c847e20..81f92b6 100644 --- ql/src/test/queries/clientpositive/vector_coalesce_2.q +++ ql/src/test/queries/clientpositive/vector_coalesce_2.q @@ -7,7 +7,7 @@ create table str_str_orc (str1 string, str2 string) stored as orc; insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y"); -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -18,7 +18,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc; @@ -27,7 +27,7 @@ from str_str_orc; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc @@ -38,7 +38,7 @@ SELECT from str_str_orc GROUP BY str2; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc; diff --git ql/src/test/queries/clientpositive/vector_complex_all.q ql/src/test/queries/clientpositive/vector_complex_all.q index 91a7368..2dcfe86 100644 --- ql/src/test/queries/clientpositive/vector_complex_all.q +++ ql/src/test/queries/clientpositive/vector_complex_all.q @@ -27,19 +27,19 @@ CREATE TABLE orc_create_complex ( INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM orc_create_staging; -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex; SELECT * FROM orc_create_complex; -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COUNT(*) FROM orc_create_complex; SELECT COUNT(*) FROM orc_create_complex; -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str FROM orc_create_complex ORDER BY str; SELECT str FROM orc_create_complex ORDER BY str; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_complex_join.q ql/src/test/queries/clientpositive/vector_complex_join.q index 30f38b1..b075955 100644 --- ql/src/test/queries/clientpositive/vector_complex_join.q +++ ql/src/test/queries/clientpositive/vector_complex_join.q @@ -10,7 +10,7 @@ set hive.fetch.task.conversion=none; CREATE TABLE test (a INT, b MAP) STORED AS ORC; INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1; -explain +explain vectorization detail select * from alltypesorc join test where alltypesorc.cint=test.a; select * from alltypesorc join test where alltypesorc.cint=test.a; @@ -23,7 +23,7 @@ INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1; CREATE TABLE test2b (a INT) STORED AS ORC; INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); -explain +explain vectorization detail select * from test2b join test2a on test2b.a = test2a.a[1]; select * from test2b join test2a on test2b.a = test2a.a[1]; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_const.q ql/src/test/queries/clientpositive/vector_const.q index 1c7d35c..01983db 100644 --- ql/src/test/queries/clientpositive/vector_const.q +++ ql/src/test/queries/clientpositive/vector_const.q @@ -6,7 +6,7 @@ CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC; INSERT INTO varchar_const_1 values(42); -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1; SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1; diff --git ql/src/test/queries/clientpositive/vector_count.q ql/src/test/queries/clientpositive/vector_count.q index 341db74..d73f0d8 100644 --- ql/src/test/queries/clientpositive/vector_count.q +++ ql/src/test/queries/clientpositive/vector_count.q @@ -12,15 +12,15 @@ create table abcd stored as orc as select * from abcd_txt; select * from abcd; set hive.map.aggr=true; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; set hive.map.aggr=false; -explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; -explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; diff --git ql/src/test/queries/clientpositive/vector_count_distinct.q ql/src/test/queries/clientpositive/vector_count_distinct.q index ec72079..11e6d09 100644 --- ql/src/test/queries/clientpositive/vector_count_distinct.q +++ ql/src/test/queries/clientpositive/vector_count_distinct.q @@ -104,7 +104,7 @@ select ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales; select count(distinct ws_order_number) from web_sales; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_custom_udf_configure.q ql/src/test/queries/clientpositive/vector_custom_udf_configure.q index eb19f3a..0c212af 100644 --- ql/src/test/queries/clientpositive/vector_custom_udf_configure.q +++ ql/src/test/queries/clientpositive/vector_custom_udf_configure.q @@ -6,6 +6,6 @@ create table testorc1(id int, name string) stored as orc; insert into table testorc1 values(1, 'a1'), (2,'a2'); set hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select id, UDFHelloTest(name) from testorc1; select id, UDFHelloTest(name) from testorc1; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_data_types.q ql/src/test/queries/clientpositive/vector_data_types.q index c7e0d1b..af0affb 100644 --- ql/src/test/queries/clientpositive/vector_data_types.q +++ ql/src/test/queries/clientpositive/vector_data_types.q @@ -38,7 +38,7 @@ INSERT INTO TABLE over1korc SELECT * FROM over1k; SET hive.vectorized.execution.enabled=false; -EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; @@ -47,7 +47,7 @@ FROM (SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, s SET hive.vectorized.execution.enabled=true; -EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20; diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 072ed5c..dd5324b 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -15,7 +15,7 @@ insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1; -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -42,7 +42,7 @@ select dt2 > dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, dt2, -- should be all false @@ -70,7 +70,7 @@ select from vector_date_1 order by dt1; -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -97,7 +97,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, -- should all be false @@ -127,7 +127,7 @@ from vector_date_1 order by dt1; -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -154,7 +154,7 @@ order by dt1; -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 diff --git ql/src/test/queries/clientpositive/vector_decimal_1.q ql/src/test/queries/clientpositive/vector_decimal_1.q index 8a1503f..f41bd64 100644 --- ql/src/test/queries/clientpositive/vector_decimal_1.q +++ ql/src/test/queries/clientpositive/vector_decimal_1.q @@ -12,47 +12,47 @@ desc decimal_1; insert overwrite table decimal_1 select cast('17.29' as decimal(4,2)), 3.1415926BD, 3115926.54321BD from src tablesample (1 rows); -explain +explain vectorization detail select cast(t as boolean) from decimal_1 order by t; select cast(t as boolean) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as tinyint) from decimal_1 order by t; select cast(t as tinyint) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as smallint) from decimal_1 order by t; select cast(t as smallint) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as int) from decimal_1 order by t; select cast(t as int) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as bigint) from decimal_1 order by t; select cast(t as bigint) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as float) from decimal_1 order by t; select cast(t as float) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as double) from decimal_1 order by t; select cast(t as double) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as string) from decimal_1 order by t; select cast(t as string) from decimal_1 order by t; -explain +explain vectorization detail select cast(t as timestamp) from decimal_1 order by t; select cast(t as timestamp) from decimal_1 order by t; diff --git ql/src/test/queries/clientpositive/vector_decimal_10_0.q ql/src/test/queries/clientpositive/vector_decimal_10_0.q index 596b2bd..180302f 100644 --- ql/src/test/queries/clientpositive/vector_decimal_10_0.q +++ ql/src/test/queries/clientpositive/vector_decimal_10_0.q @@ -12,7 +12,7 @@ LOAD DATA LOCAL INPATH '../../data/files/decimal_10_0.txt' OVERWRITE INTO TABLE CREATE TABLE `DECIMAL` STORED AS ORC AS SELECT * FROM decimal_txt; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT dec FROM `DECIMAL` order by dec; SELECT dec FROM `DECIMAL` order by dec; diff --git ql/src/test/queries/clientpositive/vector_decimal_2.q ql/src/test/queries/clientpositive/vector_decimal_2.q index f1477ce..50eef3c 100644 --- ql/src/test/queries/clientpositive/vector_decimal_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_2.q @@ -10,42 +10,42 @@ create table decimal_2 (t decimal(18,9)) stored as orc; insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from src tablesample (1 rows); -explain +explain vectorization detail select cast(t as boolean) from decimal_2 order by t; select cast(t as boolean) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as tinyint) from decimal_2 order by t; select cast(t as tinyint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as smallint) from decimal_2 order by t; select cast(t as smallint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as int) from decimal_2 order by t; select cast(t as int) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as bigint) from decimal_2 order by t; select cast(t as bigint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as float) from decimal_2 order by t; select cast(t as float) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as double) from decimal_2 order by t; select cast(t as double) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as string) from decimal_2 order by t; select cast(t as string) from decimal_2 order by t; @@ -53,95 +53,95 @@ select cast(t as string) from decimal_2 order by t; insert overwrite table decimal_2 select cast('3404045.5044003' as decimal(18,9)) from src tablesample (1 rows); -explain +explain vectorization detail select cast(t as boolean) from decimal_2 order by t; select cast(t as boolean) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as tinyint) from decimal_2 order by t; select cast(t as tinyint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as smallint) from decimal_2 order by t; select cast(t as smallint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as int) from decimal_2 order by t; select cast(t as int) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as bigint) from decimal_2 order by t; select cast(t as bigint) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as float) from decimal_2 order by t; select cast(t as float) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as double) from decimal_2 order by t; select cast(t as double) from decimal_2 order by t; -explain +explain vectorization detail select cast(t as string) from decimal_2 order by t; select cast(t as string) from decimal_2 order by t; -explain +explain vectorization detail select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c; select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c; select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c; select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c; select cast(true as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(3Y as decimal) as c from decimal_2 order by c; select cast(3Y as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(3S as decimal) as c from decimal_2 order by c; select cast(3S as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(cast(3 as int) as decimal) as c from decimal_2 order by c; select cast(cast(3 as int) as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(3L as decimal) as c from decimal_2 order by c; select cast(3L as decimal) as c from decimal_2 order by c; -explain +explain vectorization detail select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c; select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c; -explain +explain vectorization detail select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c; select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c; diff --git ql/src/test/queries/clientpositive/vector_decimal_aggregate.q ql/src/test/queries/clientpositive/vector_decimal_aggregate.q index 552a564..eaa4b20 100644 --- ql/src/test/queries/clientpositive/vector_decimal_aggregate.q +++ ql/src/test/queries/clientpositive/vector_decimal_aggregate.q @@ -10,7 +10,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -24,7 +24,7 @@ SELECT cint, HAVING COUNT(*) > 1; -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby diff --git ql/src/test/queries/clientpositive/vector_decimal_cast.q ql/src/test/queries/clientpositive/vector_decimal_cast.q index eb0e75c..5f6b9e3 100644 --- ql/src/test/queries/clientpositive/vector_decimal_cast.q +++ ql/src/test/queries/clientpositive/vector_decimal_cast.q @@ -1,6 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_expressions.q ql/src/test/queries/clientpositive/vector_decimal_expressions.q index 33d0747..cced847 100644 --- ql/src/test/queries/clientpositive/vector_decimal_expressions.q +++ ql/src/test/queries/clientpositive/vector_decimal_expressions.q @@ -5,7 +5,7 @@ set hive.explain.user=false; CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +EXPLAIN VECTORIZATION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 3007239..f1fc9d8 100644 --- ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -26,7 +26,7 @@ INSERT INTO TABLE t1 select dec from over1k; CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC; INSERT INTO TABLE t2 select dec from over1k; -explain +explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec); -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q index 4ebde6a..4b13088 100644 --- ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q +++ ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q @@ -5,7 +5,7 @@ SET hive.vectorized.execution.enabled=true; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdecimal1 ,Round(cdecimal1, 2) diff --git ql/src/test/queries/clientpositive/vector_decimal_precision.q ql/src/test/queries/clientpositive/vector_decimal_precision.q index cc3fb63..c1e4328 100644 --- ql/src/test/queries/clientpositive/vector_decimal_precision.q +++ ql/src/test/queries/clientpositive/vector_decimal_precision.q @@ -26,7 +26,7 @@ SELECT dec, dec / 9 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec / 27 FROM DECIMAL_PRECISION ORDER BY dec; SELECT dec, dec * dec FROM DECIMAL_PRECISION ORDER BY dec; -EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; +EXPLAIN VECTORIZATION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION; SELECT dec * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1; diff --git ql/src/test/queries/clientpositive/vector_decimal_round.q ql/src/test/queries/clientpositive/vector_decimal_round.q index bf83163..da44e4b 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round.q +++ ql/src/test/queries/clientpositive/vector_decimal_round.q @@ -12,12 +12,12 @@ insert into table decimal_tbl_txt values(101); select * from decimal_tbl_txt; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by dec; select dec, round(dec, -1) from decimal_tbl_txt order by dec; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1); @@ -29,12 +29,12 @@ insert into table decimal_tbl_rc values(101); select * from decimal_tbl_rc; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by dec; select dec, round(dec, -1) from decimal_tbl_rc order by dec; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1); @@ -46,12 +46,12 @@ insert into table decimal_tbl_orc values(101); select * from decimal_tbl_orc; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by dec; select dec, round(dec, -1) from decimal_tbl_orc order by dec; -explain +explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_decimal_round_2.q ql/src/test/queries/clientpositive/vector_decimal_round_2.q index 0020325..78953be 100644 --- ql/src/test/queries/clientpositive/vector_decimal_round_2.q +++ ql/src/test/queries/clientpositive/vector_decimal_round_2.q @@ -19,7 +19,7 @@ select * from decimal_tbl_1_orc; -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -39,7 +39,7 @@ insert into table decimal_tbl_2_orc values(125.315, -125.315); select * from decimal_tbl_2_orc; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -65,7 +65,7 @@ insert into table decimal_tbl_3_orc values(3.141592653589793); select * from decimal_tbl_3_orc; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -113,7 +113,7 @@ insert into table decimal_tbl_4_orc values(1809242.3151111344, -1809242.31511113 select * from decimal_tbl_4_orc; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf.q ql/src/test/queries/clientpositive/vector_decimal_udf.q index 416d348..38798d9 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf.q @@ -19,80 +19,80 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt; -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key + key FROM DECIMAL_UDF; SELECT key + key FROM DECIMAL_UDF; -EXPLAIN SELECT key + value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key + value FROM DECIMAL_UDF; SELECT key + value FROM DECIMAL_UDF; -EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key + (value/2) FROM DECIMAL_UDF; SELECT key + (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key + '1.0' FROM DECIMAL_UDF; SELECT key + '1.0' FROM DECIMAL_UDF; -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key - key FROM DECIMAL_UDF; SELECT key - key FROM DECIMAL_UDF; -EXPLAIN SELECT key - value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key - value FROM DECIMAL_UDF; SELECT key - value FROM DECIMAL_UDF; -EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key - (value/2) FROM DECIMAL_UDF; SELECT key - (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key - '1.0' FROM DECIMAL_UDF; SELECT key - '1.0' FROM DECIMAL_UDF; -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key * key FROM DECIMAL_UDF; SELECT key * key FROM DECIMAL_UDF; -EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0; +EXPLAIN VECTORIZATION SELECT key, value FROM DECIMAL_UDF where key * value > 0; SELECT key, value FROM DECIMAL_UDF where key * value > 0; -EXPLAIN SELECT key * value FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key * value FROM DECIMAL_UDF; SELECT key * value FROM DECIMAL_UDF; -EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key * (value/2) FROM DECIMAL_UDF; SELECT key * (value/2) FROM DECIMAL_UDF; -EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT key * '2.0' FROM DECIMAL_UDF; SELECT key * '2.0' FROM DECIMAL_UDF; -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1; +EXPLAIN VECTORIZATION SELECT key / 0 FROM DECIMAL_UDF limit 1; SELECT key / 0 FROM DECIMAL_UDF limit 1; -EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1; +EXPLAIN VECTORIZATION SELECT key / NULL FROM DECIMAL_UDF limit 1; SELECT key / NULL FROM DECIMAL_UDF limit 1; -EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; +EXPLAIN VECTORIZATION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; -EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; +EXPLAIN VECTORIZATION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0; -EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; +EXPLAIN VECTORIZATION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0; -EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; SELECT 1 + (key / '2.0') FROM DECIMAL_UDF; -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT abs(key) FROM DECIMAL_UDF; SELECT abs(key) FROM DECIMAL_UDF; -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; +EXPLAIN VECTORIZATION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value; -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT -key FROM DECIMAL_UDF; SELECT -key FROM DECIMAL_UDF; -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT +key FROM DECIMAL_UDF; SELECT +key FROM DECIMAL_UDF; -- ceiling @@ -100,43 +100,43 @@ EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF; SELECT CEIL(key) FROM DECIMAL_UDF; -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT FLOOR(key) FROM DECIMAL_UDF; SELECT FLOOR(key) FROM DECIMAL_UDF; -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT ROUND(key, 2) FROM DECIMAL_UDF; SELECT ROUND(key, 2) FROM DECIMAL_UDF; -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF; SELECT POWER(key, 2) FROM DECIMAL_UDF; -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF; -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; +EXPLAIN VECTORIZATION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value; -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; +EXPLAIN VECTORIZATION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value; -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF; -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT MIN(key) FROM DECIMAL_UDF; SELECT MIN(key) FROM DECIMAL_UDF; -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT MAX(key) FROM DECIMAL_UDF; SELECT MAX(key) FROM DECIMAL_UDF; -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF; +EXPLAIN VECTORIZATION SELECT COUNT(key) FROM DECIMAL_UDF; SELECT COUNT(key) FROM DECIMAL_UDF; DROP TABLE IF EXISTS DECIMAL_UDF_txt; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf2.q ql/src/test/queries/clientpositive/vector_decimal_udf2.q index 433f464..f77c440a 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf2.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf2.q @@ -18,14 +18,14 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF2 SELECT * FROM DECIMAL_UDF2_txt; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), diff --git ql/src/test/queries/clientpositive/vector_distinct_2.q ql/src/test/queries/clientpositive/vector_distinct_2.q index 509b262..cd0b962 100644 --- ql/src/test/queries/clientpositive/vector_distinct_2.q +++ ql/src/test/queries/clientpositive/vector_distinct_2.q @@ -41,7 +41,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select distinct s, t from vectortab2korc; select distinct s, t from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_elt.q ql/src/test/queries/clientpositive/vector_elt.q index f44a3be..ec06865 100644 --- ql/src/test/queries/clientpositive/vector_elt.q +++ ql/src/test/queries/clientpositive/vector_elt.q @@ -2,7 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; @@ -10,7 +10,7 @@ SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cin FROM alltypesorc WHERE ctinyint > 0 LIMIT 10; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), diff --git ql/src/test/queries/clientpositive/vector_empty_where.q ql/src/test/queries/clientpositive/vector_empty_where.q index 0543a65..e828eb8 100644 --- ql/src/test/queries/clientpositive/vector_empty_where.q +++ ql/src/test/queries/clientpositive/vector_empty_where.q @@ -2,22 +2,22 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -- HIVE- -explain +explain vectorization detail select count (distinct cint) from alltypesorc where cstring1; select count (distinct cint) from alltypesorc where cstring1; -explain +explain vectorization detail select count (distinct cint) from alltypesorc where cint; select count (distinct cint) from alltypesorc where cint; -explain +explain vectorization detail select count (distinct cint) from alltypesorc where cfloat; select count (distinct cint) from alltypesorc where cfloat; -explain +explain vectorization detail select count (distinct cint) from alltypesorc where ctimestamp1; select count (distinct cint) from alltypesorc where ctimestamp1; diff --git ql/src/test/queries/clientpositive/vector_groupby4.q ql/src/test/queries/clientpositive/vector_groupby4.q index a59d1a8..717ff0c 100644 --- ql/src/test/queries/clientpositive/vector_groupby4.q +++ ql/src/test/queries/clientpositive/vector_groupby4.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1); diff --git ql/src/test/queries/clientpositive/vector_groupby6.q ql/src/test/queries/clientpositive/vector_groupby6.q index 89c7a19..68591de 100644 --- ql/src/test/queries/clientpositive/vector_groupby6.q +++ ql/src/test/queries/clientpositive/vector_groupby6.q @@ -12,7 +12,7 @@ CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; CREATE TABLE dest1(c1 STRING) STORED AS ORC; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1); diff --git ql/src/test/queries/clientpositive/vector_groupby_3.q ql/src/test/queries/clientpositive/vector_groupby_3.q index d42d7f1..2d81fc7 100644 --- ql/src/test/queries/clientpositive/vector_groupby_3.q +++ ql/src/test/queries/clientpositive/vector_groupby_3.q @@ -41,7 +41,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t; select s, t, max(b) from vectortab2korc group by s, t; diff --git ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q index 53df2aa..52b0d11 100644 --- ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -8,7 +8,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; set hive.exec.dynamic.partition.mode=nonstrict; -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization detail select * from src where not key in diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce.q ql/src/test/queries/clientpositive/vector_groupby_reduce.q index e78b57f..6dddb33 100644 --- ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -91,7 +91,7 @@ ss_sold_date_sk , ss_net_profit from store_sales_txt; -explain +explain vectorization detail select ss_ticket_number from @@ -108,7 +108,7 @@ limit 20; -explain +explain vectorization detail select min(ss_ticket_number) m from @@ -133,7 +133,7 @@ order by m; -explain +explain vectorization detail select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -157,7 +157,7 @@ group by ss_ticket_number order by ss_ticket_number; -explain +explain vectorization detail select ss_ticket_number, ss_item_sk, sum(q) from diff --git ql/src/test/queries/clientpositive/vector_grouping_sets.q ql/src/test/queries/clientpositive/vector_grouping_sets.q index 09ba6b6..32a906d 100644 --- ql/src/test/queries/clientpositive/vector_grouping_sets.q +++ ql/src/test/queries/clientpositive/vector_grouping_sets.q @@ -45,7 +45,7 @@ create table store stored as orc as select * from store_txt; -explain +explain vectorization detail select s_store_id from store group by s_store_id with rollup; @@ -54,7 +54,7 @@ select s_store_id from store group by s_store_id with rollup; -explain +explain vectorization detail select s_store_id, GROUPING__ID from store group by s_store_id with rollup; diff --git ql/src/test/queries/clientpositive/vector_if_expr.q ql/src/test/queries/clientpositive/vector_if_expr.q index 475cecf..fc54297 100644 --- ql/src/test/queries/clientpositive/vector_if_expr.q +++ ql/src/test/queries/clientpositive/vector_if_expr.q @@ -3,7 +3,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=minimal; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1; SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 LIMIT 5; diff --git ql/src/test/queries/clientpositive/vector_include_no_sel.q ql/src/test/queries/clientpositive/vector_include_no_sel.q index 03f676b..251ddb0 100644 --- ql/src/test/queries/clientpositive/vector_include_no_sel.q +++ ql/src/test/queries/clientpositive/vector_include_no_sel.q @@ -69,7 +69,7 @@ LOAD DATA LOCAL INPATH '../../data/files/customer_demographics.txt' OVERWRITE IN create table customer_demographics stored as orc as select * from customer_demographics_txt; -explain +explain vectorization detail select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')); diff --git ql/src/test/queries/clientpositive/vector_inner_join.q ql/src/test/queries/clientpositive/vector_inner_join.q index 24b66bf..184a110 100644 --- ql/src/test/queries/clientpositive/vector_inner_join.q +++ ql/src/test/queries/clientpositive/vector_inner_join.q @@ -9,12 +9,12 @@ CREATE TABLE orc_table_2a(c INT) STORED AS ORC; insert into table orc_table_1a values(1),(1), (2),(3); insert into table orc_table_2a values(0),(2), (3),(null),(4); -explain +explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2; @@ -26,38 +26,38 @@ CREATE TABLE orc_table_2b(c INT, v2 STRING) STORED AS ORC; insert into table orc_table_1b values("one", 1),("one", 1), ("two", 2),("three", 3); insert into table orc_table_2b values(0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, ""),(4, "FOUR"); -explain +explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; -explain +explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2; diff --git ql/src/test/queries/clientpositive/vector_interval_1.q ql/src/test/queries/clientpositive/vector_interval_1.q index 8fefe41..f494587 100644 --- ql/src/test/queries/clientpositive/vector_interval_1.q +++ ql/src/test/queries/clientpositive/vector_interval_1.q @@ -13,7 +13,7 @@ insert into vector_interval_1 select null, null, null, null from src limit 1; -- constants/cast from string -explain +explain vectorization detail select str1, interval '1-2' year to month, interval_year_month(str1), @@ -28,7 +28,7 @@ from vector_interval_1 order by str1; -- interval arithmetic -explain +explain vectorization detail select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -49,7 +49,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt; -explain +explain vectorization detail select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -72,7 +72,7 @@ from vector_interval_1 order by dt; -- date-interval arithmetic -explain +explain vectorization detail select dt, dt + interval '1-2' year to month, @@ -107,7 +107,7 @@ from vector_interval_1 order by dt; -- timestamp-interval arithmetic -explain +explain vectorization detail select ts, ts + interval '1-2' year to month, @@ -142,7 +142,7 @@ from vector_interval_1 order by ts; -- timestamp-timestamp arithmetic -explain +explain vectorization detail select ts, ts - ts, @@ -159,7 +159,7 @@ from vector_interval_1 order by ts; -- date-date arithmetic -explain +explain vectorization detail select dt, dt - dt, @@ -176,7 +176,7 @@ from vector_interval_1 order by dt; -- date-timestamp arithmetic -explain +explain vectorization detail select dt, ts - dt, diff --git ql/src/test/queries/clientpositive/vector_interval_2.q ql/src/test/queries/clientpositive/vector_interval_2.q index 5afb511..bbb8d27 100644 --- ql/src/test/queries/clientpositive/vector_interval_2.q +++ ql/src/test/queries/clientpositive/vector_interval_2.q @@ -14,7 +14,7 @@ insert into vector_interval_2 -- interval comparisons in select clause -explain +explain vectorization detail select str1, -- Should all be true @@ -77,7 +77,7 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1; -explain +explain vectorization detail select str1, -- Should all be false @@ -128,7 +128,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1; -explain +explain vectorization detail select str3, -- Should all be true @@ -191,7 +191,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3; -explain +explain vectorization detail select str3, -- Should all be false @@ -244,7 +244,7 @@ from vector_interval_2 order by str3; -- interval expressions in predicates -explain +explain vectorization detail select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -293,7 +293,7 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts; -explain +explain vectorization detail select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -342,7 +342,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts; -explain +explain vectorization detail select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -381,7 +381,7 @@ where and dt != dt + interval '1-2' year to month order by ts; -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -431,7 +431,7 @@ where order by ts; -- day to second expressions in predicate -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -480,7 +480,7 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts; -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day diff --git ql/src/test/queries/clientpositive/vector_interval_arithmetic.q ql/src/test/queries/clientpositive/vector_interval_arithmetic.q index 40c4c03..a9290a4 100644 --- ql/src/test/queries/clientpositive/vector_interval_arithmetic.q +++ ql/src/test/queries/clientpositive/vector_interval_arithmetic.q @@ -13,7 +13,7 @@ insert overwrite table interval_arithmetic_1 SET hive.vectorized.execution.enabled=true; -- interval year-month arithmetic -explain +explain vectorization detail select dateval, dateval - interval '2-2' year to month, @@ -36,7 +36,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization detail select dateval, dateval - date '1999-06-07', @@ -53,7 +53,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization detail select tsval, tsval - interval '2-2' year to month, @@ -76,7 +76,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization detail select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -93,7 +93,7 @@ limit 2; -- interval day-time arithmetic -explain +explain vectorization detail select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -116,7 +116,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization detail select dateval, tsval, @@ -135,7 +135,7 @@ select from interval_arithmetic_1 order by dateval; -explain +explain vectorization detail select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -158,7 +158,7 @@ select from interval_arithmetic_1 order by tsval; -explain +explain vectorization detail select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second diff --git ql/src/test/queries/clientpositive/vector_interval_mapjoin.q ql/src/test/queries/clientpositive/vector_interval_mapjoin.q index 36ccd35..76b1ba5 100644 --- ql/src/test/queries/clientpositive/vector_interval_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_interval_mapjoin.q @@ -45,7 +45,7 @@ LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE ve CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k; -explain +explain vectorization detail select v1.s, v2.s, diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 1467cd3..877800d 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -10,7 +10,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -25,7 +25,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -40,7 +40,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -55,7 +55,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -76,7 +76,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -97,7 +97,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -118,7 +118,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -139,7 +139,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain +explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN diff --git ql/src/test/queries/clientpositive/vector_join_part_col_char.q ql/src/test/queries/clientpositive/vector_join_part_col_char.q index 5cfce37..710f872 100644 --- ql/src/test/queries/clientpositive/vector_join_part_col_char.q +++ ql/src/test/queries/clientpositive/vector_join_part_col_char.q @@ -22,7 +22,7 @@ insert into table char_tbl2 partition(gpa='3') select name, age from studenttab show partitions char_tbl1; show partitions char_tbl2; -explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); +explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); set hive.vectorized.execution.enabled=false; diff --git ql/src/test/queries/clientpositive/vector_left_outer_join.q ql/src/test/queries/clientpositive/vector_left_outer_join.q index ddf2660..04fee2c 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join.q @@ -3,7 +3,7 @@ set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index 5da5d50..a153695 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -20,14 +20,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +36,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +44,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +52,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +60,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index dfb8405..de037b1 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -26,381 +26,502 @@ select * from t4; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only summary + +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; +explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; +explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; +explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; +explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; +explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; +explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q index c9e9e48..7701bcd 100644 --- ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q +++ ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q @@ -9,7 +9,7 @@ SET hive.auto.convert.join=true; -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -23,7 +23,7 @@ where li.l_linenumber = 1 and ; -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and diff --git ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q index 1f17669..0e9bcf9 100644 --- ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q +++ ql/src/test/queries/clientpositive/vector_mr_diff_schema_alias.q @@ -104,7 +104,7 @@ stored as orc; -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, diff --git ql/src/test/queries/clientpositive/vector_multi_insert.q ql/src/test/queries/clientpositive/vector_multi_insert.q index 374a0da..11cad1e 100644 --- ql/src/test/queries/clientpositive/vector_multi_insert.q +++ ql/src/test/queries/clientpositive/vector_multi_insert.q @@ -22,7 +22,7 @@ create table orc_rn3 (rn int); analyze table orc1 compute statistics; -explain from orc1 a +explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000; diff --git ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q index 69142bf..113ea7f 100644 --- ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q +++ ql/src/test/queries/clientpositive/vector_non_constant_in_expr.q @@ -1,4 +1,4 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; -explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file +explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_non_string_partition.q ql/src/test/queries/clientpositive/vector_non_string_partition.q index bffc93e..7135ad5 100644 --- ql/src/test/queries/clientpositive/vector_non_string_partition.q +++ ql/src/test/queries/clientpositive/vector_non_string_partition.q @@ -10,10 +10,10 @@ WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble; SHOW PARTITIONS non_string_part; -EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; +EXPLAIN VECTORIZATION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; -EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; +EXPLAIN VECTORIZATION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_null_projection.q ql/src/test/queries/clientpositive/vector_null_projection.q index 66c0838..286a10d 100644 --- ql/src/test/queries/clientpositive/vector_null_projection.q +++ ql/src/test/queries/clientpositive/vector_null_projection.q @@ -10,12 +10,12 @@ insert into table a values('aaa'); insert into table b values('aaa'); -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization detail select NULL from a; select NULL from a; -explain +explain vectorization detail select NULL as x from a union distinct select NULL as x from b; select NULL as x from a union distinct select NULL as x from b; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index b316a54..9c9e4ce 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -20,19 +20,19 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SET hive.vectorized.execution.mapjoin.native.enabled=false; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins @@ -47,19 +47,19 @@ SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value; SET hive.vectorized.execution.mapjoin.native.enabled=true; -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value; select * from myinput1 a join myinput1 b on a.key<=>b.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; -explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; -- outer joins diff --git ql/src/test/queries/clientpositive/vector_number_compare_projection.q ql/src/test/queries/clientpositive/vector_number_compare_projection.q index feb5e98..29a2a74 100644 --- ql/src/test/queries/clientpositive/vector_number_compare_projection.q +++ ql/src/test/queries/clientpositive/vector_number_compare_projection.q @@ -35,7 +35,7 @@ SET hive.vectorized.execution.enabled=true; -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; @@ -44,7 +44,7 @@ SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q; diff --git ql/src/test/queries/clientpositive/vector_nvl.q ql/src/test/queries/clientpositive/vector_nvl.q index 742bf52..c3fae7c 100644 --- ql/src/test/queries/clientpositive/vector_nvl.q +++ ql/src/test/queries/clientpositive/vector_nvl.q @@ -1,7 +1,7 @@ SET hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +EXPLAIN VECTORIZATION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; @@ -11,7 +11,7 @@ FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10; -EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +EXPLAIN VECTORIZATION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; @@ -19,7 +19,7 @@ SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, 10) as n +EXPLAIN VECTORIZATION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; @@ -27,7 +27,7 @@ SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10; -EXPLAIN SELECT nvl(null, null) as n +EXPLAIN VECTORIZATION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_orderby_5.q ql/src/test/queries/clientpositive/vector_orderby_5.q index 30bcaef..0798209 100644 --- ql/src/test/queries/clientpositive/vector_orderby_5.q +++ ql/src/test/queries/clientpositive/vector_orderby_5.q @@ -39,7 +39,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc; select bo, max(b) from vectortab2korc group by bo order by bo desc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_outer_join0.q ql/src/test/queries/clientpositive/vector_outer_join0.q index dce3a1b..3d632a6 100644 --- ql/src/test/queries/clientpositive/vector_outer_join0.q +++ ql/src/test/queries/clientpositive/vector_outer_join0.q @@ -12,14 +12,14 @@ insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, select * from orc_table_1; select * from orc_table_2; -explain +explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c; -explain +explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vector_outer_join1.q ql/src/test/queries/clientpositive/vector_outer_join1.q index 4a36452..da6ec97 100644 --- ql/src/test/queries/clientpositive/vector_outer_join1.q +++ ql/src/test/queries/clientpositive/vector_outer_join1.q @@ -28,7 +28,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -41,7 +41,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint; -explain +explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd @@ -54,7 +54,7 @@ from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join2.q ql/src/test/queries/clientpositive/vector_outer_join2.q index d3b5805..9a92548 100644 --- ql/src/test/queries/clientpositive/vector_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_outer_join2.q @@ -28,7 +28,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join3.q ql/src/test/queries/clientpositive/vector_outer_join3.q index e5fc0a9..9b02be9 100644 --- ql/src/test/queries/clientpositive/vector_outer_join3.q +++ ql/src/test/queries/clientpositive/vector_outer_join3.q @@ -27,7 +27,7 @@ ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS; ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_a; -explain +explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -47,7 +47,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -67,7 +67,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1; -explain +explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/queries/clientpositive/vector_outer_join4.q ql/src/test/queries/clientpositive/vector_outer_join4.q index 45461b5..f6f1f8a 100644 --- ql/src/test/queries/clientpositive/vector_outer_join4.q +++ ql/src/test/queries/clientpositive/vector_outer_join4.q @@ -28,7 +28,7 @@ ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS; select * from small_alltypesorc_b; -explain +explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -41,7 +41,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint; -explain +explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -54,7 +54,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint; -explain +explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/queries/clientpositive/vector_outer_join5.q ql/src/test/queries/clientpositive/vector_outer_join5.q index 18b9ab4..17844cb 100644 --- ql/src/test/queries/clientpositive/vector_outer_join5.q +++ ql/src/test/queries/clientpositive/vector_outer_join5.q @@ -22,7 +22,7 @@ as orc as select ctinyint, cbigint from alltypesorc limit 100; ANALYZE TABLE small_table COMPUTE STATISTICS; ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -35,7 +35,7 @@ left outer join small_table st on s.ctinyint = st.ctinyint ) t1; -explain +explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -48,7 +48,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1; -explain +explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -61,7 +61,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1; -explain +explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -74,7 +74,7 @@ left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1; -explain +explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -105,7 +105,7 @@ as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc lim ANALYZE TABLE small_table2 COMPUTE STATISTICS; ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS; -explain +explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -118,7 +118,7 @@ left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1; -explain +explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -131,7 +131,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1; -explain +explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -144,7 +144,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1; -explain +explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -157,7 +157,7 @@ left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1; -explain +explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/queries/clientpositive/vector_outer_join6.q ql/src/test/queries/clientpositive/vector_outer_join6.q index 06fa385..1fcaa4e 100644 --- ql/src/test/queries/clientpositive/vector_outer_join6.q +++ ql/src/test/queries/clientpositive/vector_outer_join6.q @@ -28,14 +28,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt; create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt; create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt; -explain +explain vectorization only formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; -explain +explain vectorization only formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q index f25374d..8c526de 100644 --- ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q @@ -27,7 +27,7 @@ partitioned by (par string) stored as orc; insert into table inventory_part_0 partition(par='1') select * from inventory_txt; insert into table inventory_part_0 partition(par='2') select * from inventory_txt; -explain +explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_0; select sum(inv_quantity_on_hand) from inventory_part_0; @@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string); insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt; -explain +explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_1; select sum(inv_quantity_on_hand) from inventory_part_1; @@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select * from inventory_t insert into table inventory_part_2a partition(par='2') select * from inventory_txt; alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int; -explain +explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2a; create table inventory_part_2b( @@ -80,7 +80,7 @@ insert into table inventory_part_2b partition(par1='1',par2=4) select * from inv insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt; alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int; -explain +explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2b; -- Verify we do not vectorize when a partition column type is different. @@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select * from inventory_tx insert into table inventory_part_3 partition(par='2') select * from inventory_txt; alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint; -explain +explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time.q ql/src/test/queries/clientpositive/vector_partitioned_date_time.q index f53d8c0..bd55e3a 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time.q @@ -32,12 +32,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -71,17 +71,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -115,17 +115,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q index c157df1..51c09b7 100644 --- ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q +++ ql/src/test/queries/clientpositive/vector_partitioned_date_time_win.q @@ -30,12 +30,12 @@ select fl_date, count(*) from flights_tiny_orc group by fl_date; SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25; select * from flights_tiny_orc sort by fl_num, fl_date limit 25; -explain +explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date; select fl_date, count(*) from flights_tiny_orc group by fl_date; @@ -69,17 +69,17 @@ select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_date; select * from flights_tiny_orc_partitioned_date; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25; -explain +explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date; @@ -113,17 +113,17 @@ select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl SET hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp; select * from flights_tiny_orc_partitioned_timestamp; -explain +explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25; -explain +explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time; diff --git ql/src/test/queries/clientpositive/vector_reduce1.q ql/src/test/queries/clientpositive/vector_reduce1.q index cfd803f..4a638d2 100644 --- ql/src/test/queries/clientpositive/vector_reduce1.q +++ ql/src/test/queries/clientpositive/vector_reduce1.q @@ -42,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select b from vectortab2korc order by b; select b from vectortab2korc order by b; diff --git ql/src/test/queries/clientpositive/vector_reduce2.q ql/src/test/queries/clientpositive/vector_reduce2.q index ab67132..e5e580b 100644 --- ql/src/test/queries/clientpositive/vector_reduce2.q +++ ql/src/test/queries/clientpositive/vector_reduce2.q @@ -42,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select s, i, s2 from vectortab2korc order by s, i, s2; select s, i, s2 from vectortab2korc order by s, i, s2; diff --git ql/src/test/queries/clientpositive/vector_reduce3.q ql/src/test/queries/clientpositive/vector_reduce3.q index bf8206f..03ff19d 100644 --- ql/src/test/queries/clientpositive/vector_reduce3.q +++ ql/src/test/queries/clientpositive/vector_reduce3.q @@ -42,7 +42,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain +explain vectorization detail select s from vectortab2korc order by s; select s from vectortab2korc order by s; diff --git ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q index 4a50150..ab51b3a 100644 --- ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q +++ ql/src/test/queries/clientpositive/vector_reduce_groupby_decimal.q @@ -4,7 +4,7 @@ WHERE cint is not null and cdouble is not null; SET hive.vectorized.execution.enabled=true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 diff --git ql/src/test/queries/clientpositive/vector_string_concat.q ql/src/test/queries/clientpositive/vector_string_concat.q index f3a5965..cf53671 100644 --- ql/src/test/queries/clientpositive/vector_string_concat.q +++ ql/src/test/queries/clientpositive/vector_string_concat.q @@ -37,7 +37,7 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; -EXPLAIN SELECT s AS `string`, +EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20; @@ -86,7 +86,7 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) diff --git ql/src/test/queries/clientpositive/vector_string_decimal.q ql/src/test/queries/clientpositive/vector_string_decimal.q index e69cd77..48e48ee 100644 --- ql/src/test/queries/clientpositive/vector_string_decimal.q +++ ql/src/test/queries/clientpositive/vector_string_decimal.q @@ -13,7 +13,7 @@ insert overwrite table orc_decimal select id from staging; set hive.vectorized.execution.enabled=true; -explain +explain vectorization detail select * from orc_decimal where id in ('100000000', '200000000'); select * from orc_decimal where id in ('100000000', '200000000'); diff --git ql/src/test/queries/clientpositive/vector_struct_in.q ql/src/test/queries/clientpositive/vector_struct_in.q index 50487db..06dd4b8 100644 --- ql/src/test/queries/clientpositive/vector_struct_in.q +++ ql/src/test/queries/clientpositive/vector_struct_in.q @@ -11,7 +11,7 @@ create table test_1 (`id` string, `lineid` string) stored as orc; insert into table test_1 values ('one','1'), ('seven','1'); -explain +explain vectorization detail select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -38,7 +38,7 @@ struct('nine','1'), struct('ten','1') ); -explain +explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -71,7 +71,7 @@ create table test_2 (`id` int, `lineid` int) stored as orc; insert into table test_2 values (1,1), (7,1); -explain +explain vectorization detail select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -98,7 +98,7 @@ struct(9,1), struct(10,1) ); -explain +explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -130,7 +130,7 @@ create table test_3 (`id` string, `lineid` int) stored as orc; insert into table test_3 values ('one',1), ('seven',1); -explain +explain vectorization detail select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -157,7 +157,7 @@ struct('nine',1), struct('ten',1) ); -explain +explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -189,7 +189,7 @@ create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5); -explain +explain vectorization detail select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -218,7 +218,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ); -explain +explain vectorization detail select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), diff --git ql/src/test/queries/clientpositive/vector_tablesample_rows.q ql/src/test/queries/clientpositive/vector_tablesample_rows.q index 4deb1c8..bb3c5a4 100644 --- ql/src/test/queries/clientpositive/vector_tablesample_rows.q +++ ql/src/test/queries/clientpositive/vector_tablesample_rows.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -explain +explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows); select 'key1', 'value1' from alltypesorc tablesample (1 rows); @@ -12,7 +12,7 @@ select 'key1', 'value1' from alltypesorc tablesample (1 rows); create table decimal_2 (t decimal(18,9)) stored as orc; -explain +explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows); @@ -25,12 +25,12 @@ drop table decimal_2; -- Dummy tables HIVE-13190 -explain +explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y; select count(1) from (select * from (Select 1 a) x order by x.a) y; -explain +explain vectorization detail create temporary table dual as select 1; create temporary table dual as select 1; diff --git ql/src/test/queries/clientpositive/vector_udf1.q ql/src/test/queries/clientpositive/vector_udf1.q index 2fcc69b..2eab0bd 100644 --- ql/src/test/queries/clientpositive/vector_udf1.q +++ ql/src/test/queries/clientpositive/vector_udf1.q @@ -8,7 +8,7 @@ insert overwrite table varchar_udf_1 select key, value, key, value from src where key = '238' limit 1; -- UDFs with varchar support -explain +explain vectorization detail select concat(c1, c2), concat(c3, c4), @@ -21,7 +21,7 @@ select concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select upper(c2), upper(c4), @@ -34,7 +34,7 @@ select upper(c2) = upper(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select lower(c2), lower(c4), @@ -48,7 +48,7 @@ select from varchar_udf_1 limit 1; -- Scalar UDFs -explain +explain vectorization detail select ascii(c2), ascii(c4), @@ -61,7 +61,7 @@ select ascii(c2) = ascii(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), @@ -74,7 +74,7 @@ select concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), @@ -87,7 +87,7 @@ select decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1; -explain +explain vectorization detail select instr(c2, '_'), instr(c4, '_'), @@ -100,7 +100,7 @@ select instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1; -explain +explain vectorization detail select length(c2), length(c4), @@ -113,7 +113,7 @@ select length(c2) = length(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), @@ -126,7 +126,7 @@ select locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1; -explain +explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), @@ -139,7 +139,7 @@ select lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization detail select ltrim(c2), ltrim(c4), @@ -152,7 +152,7 @@ select ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select c2 regexp 'val', c4 regexp 'val', @@ -165,7 +165,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -178,7 +178,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -191,7 +191,7 @@ select regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1; -explain +explain vectorization detail select reverse(c2), reverse(c4), @@ -204,7 +204,7 @@ select reverse(c2) = reverse(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), @@ -217,7 +217,7 @@ select rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization detail select rtrim(c2), rtrim(c4), @@ -230,7 +230,7 @@ select rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) @@ -241,7 +241,7 @@ select sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1; -explain +explain vectorization detail select split(c2, '_'), split(c4, '_') @@ -252,7 +252,7 @@ select split(c4, '_') from varchar_udf_1 limit 1; -explain +explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') @@ -263,7 +263,7 @@ select str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1; -explain +explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), @@ -276,7 +276,7 @@ select substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1; -explain +explain vectorization detail select trim(c2), trim(c4), @@ -291,7 +291,7 @@ from varchar_udf_1 limit 1; -- Aggregate Functions -explain +explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) @@ -302,7 +302,7 @@ select compute_stats(c4, 16) from varchar_udf_1; -explain +explain vectorization detail select min(c2), min(c4) @@ -313,7 +313,7 @@ select min(c4) from varchar_udf_1; -explain +explain vectorization detail select max(c2), max(c4) diff --git ql/src/test/queries/clientpositive/vector_udf2.q ql/src/test/queries/clientpositive/vector_udf2.q index b926c4f..01016b5 100644 --- ql/src/test/queries/clientpositive/vector_udf2.q +++ ql/src/test/queries/clientpositive/vector_udf2.q @@ -7,7 +7,7 @@ create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20) insert overwrite table varchar_udf_2 select key, value, key, value from src where key = '238' limit 1; -explain +explain vectorization detail select c1 LIKE '%38%', c2 LIKE 'val_%', diff --git ql/src/test/queries/clientpositive/vector_udf3.q ql/src/test/queries/clientpositive/vector_udf3.q index 8a4df79..d8f2060 100644 --- ql/src/test/queries/clientpositive/vector_udf3.q +++ ql/src/test/queries/clientpositive/vector_udf3.q @@ -4,7 +4,7 @@ CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13'; set hive.vectorized.execution.enabled=true; -EXPLAIN SELECT rot13(cstring1) from alltypesorc; +EXPLAIN VECTORIZATION SELECT rot13(cstring1) from alltypesorc; SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 10; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index 32a74a4..5ac556e 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -44,7 +44,7 @@ INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; drop table if exists varchar_lazy_binary_columnar; create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; -explain +explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; -- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; diff --git ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q index ac0570e..285d2ac 100644 --- ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q +++ ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q @@ -37,15 +37,15 @@ create table varchar_join1_vc2_orc stored as orc as select * from varchar_join1_ create table varchar_join1_str_orc stored as orc as select * from varchar_join1_str; -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1; -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1; drop table varchar_join1_vc1; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index acd6598..b5960b5 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -14,7 +14,7 @@ from src order by key asc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key asc limit 5; @@ -30,7 +30,7 @@ from src order by key desc limit 5; -explain select key, value +explain vectorization select key, value from varchar_2 order by key desc limit 5; @@ -48,7 +48,7 @@ create table varchar_3 ( field varchar(25) ) stored as orc; -explain +explain vectorization detail insert into table varchar_3 select cint from alltypesorc limit 10; insert into table varchar_3 select cint from alltypesorc limit 10; diff --git ql/src/test/queries/clientpositive/vector_when_case_null.q ql/src/test/queries/clientpositive/vector_when_case_null.q index a423b60..5e1ac72 100644 --- ql/src/test/queries/clientpositive/vector_when_case_null.q +++ ql/src/test/queries/clientpositive/vector_when_case_null.q @@ -8,7 +8,7 @@ set hive.fetch.task.conversion=none; create table count_case_groupby (key string, bool boolean) STORED AS orc; insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL); -explain +explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index caa6a6a..813c910 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -5,7 +5,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,7 +20,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; @@ -29,7 +29,7 @@ SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -54,7 +54,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -69,7 +69,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; @@ -78,7 +78,7 @@ SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -103,7 +103,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -118,7 +118,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; @@ -127,7 +127,7 @@ SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -152,7 +152,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_13.q ql/src/test/queries/clientpositive/vectorization_13.q index 005808b..9ad13be 100644 --- ql/src/test/queries/clientpositive/vectorization_13.q +++ ql/src/test/queries/clientpositive/vectorization_13.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=minimal; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -71,7 +71,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_14.q ql/src/test/queries/clientpositive/vectorization_14.q index 4796c18..c4a6b89 100644 --- ql/src/test/queries/clientpositive/vectorization_14.q +++ ql/src/test/queries/clientpositive/vectorization_14.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_15.q ql/src/test/queries/clientpositive/vectorization_15.q index 21ba8c8..90f98ec 100644 --- ql/src/test/queries/clientpositive/vectorization_15.q +++ ql/src/test/queries/clientpositive/vectorization_15.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, diff --git ql/src/test/queries/clientpositive/vectorization_16.q ql/src/test/queries/clientpositive/vectorization_16.q index 11b709f..79541e4 100644 --- ql/src/test/queries/clientpositive/vectorization_16.q +++ ql/src/test/queries/clientpositive/vectorization_16.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_17.q ql/src/test/queries/clientpositive/vectorization_17.q index 1306f6b..d05c17d 100644 --- ql/src/test/queries/clientpositive/vectorization_17.q +++ ql/src/test/queries/clientpositive/vectorization_17.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, diff --git ql/src/test/queries/clientpositive/vectorization_7.q ql/src/test/queries/clientpositive/vectorization_7.q index 131f570..b877545 100644 --- ql/src/test/queries/clientpositive/vectorization_7.q +++ ql/src/test/queries/clientpositive/vectorization_7.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=minimal; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -60,7 +60,7 @@ LIMIT 25; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, diff --git ql/src/test/queries/clientpositive/vectorization_8.q ql/src/test/queries/clientpositive/vectorization_8.q index 2d357f1..fe73641 100644 --- ql/src/test/queries/clientpositive/vectorization_8.q +++ ql/src/test/queries/clientpositive/vectorization_8.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=minimal; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -56,7 +56,7 @@ LIMIT 20; -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, diff --git ql/src/test/queries/clientpositive/vectorization_9.q ql/src/test/queries/clientpositive/vectorization_9.q index 11b709f..79541e4 100644 --- ql/src/test/queries/clientpositive/vectorization_9.q +++ ql/src/test/queries/clientpositive/vectorization_9.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_decimal_date.q ql/src/test/queries/clientpositive/vectorization_decimal_date.q index 854ee20..36e5d0c 100644 --- ql/src/test/queries/clientpositive/vectorization_decimal_date.q +++ ql/src/test/queries/clientpositive/vectorization_decimal_date.q @@ -1,5 +1,5 @@ set hive.explain.user=false; CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc; SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; +EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; diff --git ql/src/test/queries/clientpositive/vectorization_div0.q ql/src/test/queries/clientpositive/vectorization_div0.q index 05d81d0..b0236e8 100644 --- ql/src/test/queries/clientpositive/vectorization_div0.q +++ ql/src/test/queries/clientpositive/vectorization_div0.q @@ -3,14 +3,14 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100; select cdouble / 0.0 from alltypesorc limit 100; -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100; select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) @@ -18,7 +18,7 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100; select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) diff --git ql/src/test/queries/clientpositive/vectorization_limit.q ql/src/test/queries/clientpositive/vectorization_limit.q index 707f1ed..4164d76 100644 --- ql/src/test/queries/clientpositive/vectorization_limit.q +++ ql/src/test/queries/clientpositive/vectorization_limit.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; set hive.optimize.reducededuplication.min.reducer=1; @@ -9,31 +9,31 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; -- deduped RS -explain +explain vectorization detail select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; -- distincts -explain +explain vectorization detail select distinct(ctinyint) from alltypesorc limit 20; select distinct(ctinyint) from alltypesorc limit 20; -explain +explain vectorization detail select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; -- limit zero -explain +explain vectorization detail select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; -- 2MR (applied to last RS) -explain +explain vectorization detail select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; diff --git ql/src/test/queries/clientpositive/vectorization_offset_limit.q ql/src/test/queries/clientpositive/vectorization_offset_limit.q index 3d01154..322a6b8 100644 --- ql/src/test/queries/clientpositive/vectorization_offset_limit.q +++ ql/src/test/queries/clientpositive/vectorization_offset_limit.q @@ -2,9 +2,9 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.mapred.mode=nonstrict; -explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; +explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorization_part_project.q ql/src/test/queries/clientpositive/vectorization_part_project.q index 3a48f20..e45c2d1 100644 --- ql/src/test/queries/clientpositive/vectorization_part_project.q +++ ql/src/test/queries/clientpositive/vectorization_part_project.q @@ -5,5 +5,5 @@ CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cb insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc order by ctinyint, cint, cbigint limit 100; -explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; +explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10; diff --git ql/src/test/queries/clientpositive/vectorization_pushdown.q ql/src/test/queries/clientpositive/vectorization_pushdown.q index b33cfa7..25e17f7 100644 --- ql/src/test/queries/clientpositive/vectorization_pushdown.q +++ ql/src/test/queries/clientpositive/vectorization_pushdown.q @@ -2,5 +2,5 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.optimize.index.filter=true; -explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; +explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble; diff --git ql/src/test/queries/clientpositive/vectorization_short_regress.q ql/src/test/queries/clientpositive/vectorization_short_regress.q index 114a3e2..f817e3c 100644 --- ql/src/test/queries/clientpositive/vectorization_short_regress.q +++ ql/src/test/queries/clientpositive/vectorization_short_regress.q @@ -36,7 +36,8 @@ set hive.fetch.task.conversion=minimal; -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -112,7 +113,8 @@ WHERE ((762 = cbigint) -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -182,7 +184,8 @@ WHERE (((cbigint <= 197) -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -250,7 +253,8 @@ WHERE ((ctimestamp1 = ctimestamp2) -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -298,7 +302,8 @@ WHERE (((ctimestamp2 <= ctimestamp1) -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -376,7 +381,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -451,7 +457,8 @@ LIMIT 25; -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -524,7 +531,8 @@ LIMIT 75; -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -583,7 +591,8 @@ LIMIT 45; -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -628,7 +637,8 @@ LIMIT 20; -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -686,7 +696,8 @@ ORDER BY cdouble; -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -801,7 +812,8 @@ LIMIT 50; -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -883,12 +895,12 @@ ORDER BY cboolean1; -- These tests verify COUNT on empty or null colulmns work correctly. create table test_count(i int) stored as orc; -explain +explain vectorization detail select count(*) from test_count; select count(*) from test_count; -explain +explain vectorization detail select count(i) from test_count; select count(i) from test_count; @@ -911,32 +923,32 @@ insert into table alltypesnull select null, null, null, null, null, null, null, create table alltypesnullorc stored as orc as select * from alltypesnull; -explain +explain vectorization detail select count(*) from alltypesnullorc; select count(*) from alltypesnullorc; -explain +explain vectorization detail select count(ctinyint) from alltypesnullorc; select count(ctinyint) from alltypesnullorc; -explain +explain vectorization detail select count(cint) from alltypesnullorc; select count(cint) from alltypesnullorc; -explain +explain vectorization detail select count(cfloat) from alltypesnullorc; select count(cfloat) from alltypesnullorc; -explain +explain vectorization detail select count(cstring1) from alltypesnullorc; select count(cstring1) from alltypesnullorc; -explain +explain vectorization detail select count(cboolean1) from alltypesnullorc; select count(cboolean1) from alltypesnullorc; diff --git ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q index 022ce2e..80f6fc8 100644 --- ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q +++ ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q @@ -28,11 +28,11 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -explain +explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; @@ -41,6 +41,6 @@ select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index e74bf82..fda0f99 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -2,7 +2,7 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.vectorized.execution.enabled = true ; -explain +explain vectorization detail select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain +explain vectorization detail select csmallint, case diff --git ql/src/test/queries/clientpositive/vectorized_casts.q ql/src/test/queries/clientpositive/vectorized_casts.q index 0880e29..a32c150 100644 --- ql/src/test/queries/clientpositive/vectorized_casts.q +++ ql/src/test/queries/clientpositive/vectorized_casts.q @@ -8,7 +8,7 @@ SET hive.vectorized.execution.enabled = true; -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) diff --git ql/src/test/queries/clientpositive/vectorized_context.q ql/src/test/queries/clientpositive/vectorized_context.q index 657270e..d63cdb5 100644 --- ql/src/test/queries/clientpositive/vectorized_context.q +++ ql/src/test/queries/clientpositive/vectorized_context.q @@ -26,7 +26,7 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; -explain +explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 7d7b1cf..f58def7 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -24,7 +24,7 @@ INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, SELECT * FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -54,7 +54,7 @@ SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -84,7 +84,7 @@ SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -113,7 +113,7 @@ SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -134,7 +134,7 @@ FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), diff --git ql/src/test/queries/clientpositive/vectorized_distinct_gby.q ql/src/test/queries/clientpositive/vectorized_distinct_gby.q index 6900dc0..faf3415 100644 --- ql/src/test/queries/clientpositive/vectorized_distinct_gby.q +++ ql/src/test/queries/clientpositive/vectorized_distinct_gby.q @@ -7,8 +7,8 @@ SET hive.map.groupby.sorted=true; create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc; insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c; -explain select sum(distinct a), count(distinct a) from dtest; +explain vectorization select sum(distinct a), count(distinct a) from dtest; select sum(distinct a), count(distinct a) from dtest; -explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; +explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index 2d3788d..c53c024 100644 --- ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -12,28 +12,28 @@ set hive.vectorized.execution.enabled=true; select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -41,77 +41,77 @@ set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; @@ -119,60 +119,60 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart where hr = 11; set hive.stats.fetch.column.stats=false; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; set hive.stats.fetch.column.stats=true; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -185,7 +185,7 @@ set hive.vectorized.execution.enabled=false; set hive.exec.max.dynamic.partitions=1000; insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; -EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin.q ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 6500d41..921485a 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -7,7 +7,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint; diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin2.q ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index 137acbc..c04d1b7 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -15,7 +15,7 @@ create temporary table y (b int) stored as orc; insert into x values(1); insert into y values(1); -explain +explain vectorization detail select count(1) from x, y where a = b; select count(1) from x, y where a = b; diff --git ql/src/test/queries/clientpositive/vectorized_math_funcs.q ql/src/test/queries/clientpositive/vectorized_math_funcs.q index d79fcce..9062b6e 100644 --- ql/src/test/queries/clientpositive/vectorized_math_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_math_funcs.q @@ -3,7 +3,7 @@ SET hive.vectorized.execution.enabled = true; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) diff --git ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index 4332898..1b9b69d 100644 --- ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -7,6 +7,6 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; diff --git ql/src/test/queries/clientpositive/vectorized_parquet.q ql/src/test/queries/clientpositive/vectorized_parquet.q index da138e0..e6ebdaa 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet.q +++ ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -21,7 +21,7 @@ insert overwrite table alltypes_parquet SET hive.vectorized.execution.enabled=true; -explain select * +explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10; @@ -30,7 +30,7 @@ select * where cint = 528534767 limit 10; -explain select ctinyint, +explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), diff --git ql/src/test/queries/clientpositive/vectorized_parquet_types.q ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 297c5af..80070f6 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -48,19 +48,19 @@ SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; -- select -explain +explain vectorization detail SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; -explain +explain vectorization detail SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; -explain +explain vectorization detail SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index 64082e9..e1eacb0 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -42,7 +42,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -63,7 +63,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -80,7 +80,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -93,7 +93,7 @@ order by p_name); -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -137,7 +137,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,7 +162,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -177,7 +177,7 @@ order by p_name -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -192,7 +192,7 @@ order by p_name -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -207,7 +207,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -226,7 +226,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -247,7 +247,7 @@ order by p_name) -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -268,7 +268,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -295,7 +295,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -322,7 +322,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -341,7 +341,7 @@ sum(p_retailprice) as s from part_orc group by p_mfgr, p_brand; -explain extended +explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -375,7 +375,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain extended +explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -412,7 +412,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -447,7 +447,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -482,7 +482,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -514,7 +514,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -549,7 +549,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -582,7 +582,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ql/src/test/queries/clientpositive/vectorized_shufflejoin.q index f57d062..1028536 100644 --- ql/src/test/queries/clientpositive/vectorized_shufflejoin.q +++ ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -5,7 +5,7 @@ SET hive.auto.convert.join=false; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT; diff --git ql/src/test/queries/clientpositive/vectorized_string_funcs.q ql/src/test/queries/clientpositive/vectorized_string_funcs.q index d04a3c3..7376962 100644 --- ql/src/test/queries/clientpositive/vectorized_string_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_string_funcs.q @@ -3,7 +3,7 @@ SET hive.vectorized.execution.enabled = true; -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) diff --git ql/src/test/queries/clientpositive/vectorized_timestamp.q ql/src/test/queries/clientpositive/vectorized_timestamp.q index 2784b7a..87ed00d 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -6,23 +6,23 @@ CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SET hive.vectorized.execution.enabled = true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index aaf85fc..05467b9 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -23,7 +23,7 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -49,7 +49,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -75,7 +75,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -103,7 +103,7 @@ FROM alltypesorc_string ORDER BY c1; -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -129,7 +129,7 @@ SELECT FROM alltypesorc_wrong ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), @@ -144,7 +144,7 @@ SELECT FROM alltypesorc_string; -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; @@ -152,7 +152,7 @@ SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q index 15964c9..bb0aedd 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q @@ -2,7 +2,7 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled = true; SET hive.int.timestamp.conversion.in.seconds=false; -explain +explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -40,7 +40,7 @@ where cbigint % 250 = 0; SET hive.int.timestamp.conversion.in.seconds=true; -explain +explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index d4a9747..0611d0a 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,9 +131,23 @@ STAGE PLANS: Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[6:decimal(38,18)] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFMaxDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFSumDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFAvgDecimal(IdentityExpression[6:decimal(38,18)]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(IdentityExpression[6:decimal(38,18)]) output type STRUCT requires PRIMITIVE IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -139,8 +157,21 @@ STAGE PLANS: value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index 3d00ade..6785248 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -33,9 +33,9 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 735e4f4..e096116 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -43,48 +43,113 @@ POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -102,7 +167,7 @@ POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select key, count(*) from @@ -113,7 +178,7 @@ select count(*) from ) subq2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select key, count(*) from @@ -123,46 +188,156 @@ select count(*) from group by key ) subq2 POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_31] - Group By Operator [GBY_30] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_27] (rows=5 width=93) - Group By Operator [GBY_26] (rows=5 width=93) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_24] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_23] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -191,7 +366,7 @@ POSTHOOK: Input: default@tbl2 6 PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization detail select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -210,7 +385,7 @@ on src1.key = src2.key PREHOOK: type: QUERY POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization detail select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -227,71 +402,210 @@ join ) src2 on src1.key = src2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 llap - File Output Operator [FS_32] - Select Operator [SEL_31] (rows=5 width=102) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_49] (rows=5 width=102) - Conds:RS_51._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=11 width=93) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_42] (rows=10 width=93) - predicate:key is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_41] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Reducer 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Group By Operator [GBY_52] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] - PartitionCols:_col0 - Group By Operator [GBY_24] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_47] (rows=11 width=93) - Conds:SEL_16._col0=SEL_19._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_19] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_44] (rows=10 width=93) - predicate:key is not null - TableScan [TS_17] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_16] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_43] (rows=10 width=93) - predicate:key is not null - TableScan [TS_14] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from ( @@ -339,7 +653,7 @@ POSTHOOK: Input: default@tbl2 9 1 1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -348,44 +662,109 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -408,7 +787,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization detail select count(*) from ( select * from @@ -422,7 +801,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization detail select count(*) from ( select * from @@ -434,37 +813,102 @@ select count(*) from join tbl2 b on subq2.key = b.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -497,7 +941,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select * from @@ -518,7 +962,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select * from @@ -537,37 +981,102 @@ select count(*) from ) subq4 on subq2.key = subq4.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( @@ -613,7 +1122,7 @@ POSTHOOK: Input: default@tbl1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization detail select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -623,44 +1132,109 @@ PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization detail select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 8) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 @@ -683,7 +1257,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization detail select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -692,51 +1266,177 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization detail select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongColAddLongScalar[2:long]) + vectorized: true + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: LongColAddLongScalar[2:long] + vectorized: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongColAddLongScalar[2:long]) + vectorized: true + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: LongColAddLongScalar[2:long] + vectorized: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_29] - Group By Operator [GBY_28] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:RS_24._col0=RS_27._col0(Inner) - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_22] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=10 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=10 width=93) - predicate:(key + 1) is not null - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 @@ -759,49 +1459,114 @@ POSTHOOK: Input: default@tbl2 22 PREHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=3 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -820,7 +1585,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -832,7 +1597,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -842,43 +1607,115 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_32] - Group By Operator [GBY_31] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_28] (rows=6 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner),SEL_2._col0=SEL_8._col0(Inner) - <-Select Operator [SEL_5] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_26] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_8] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_27] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_6] (rows=10 width=93) - default@tbl2,a,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=3 width=93) - Output:["_col0"] - Filter Operator [FIL_25] (rows=3 width=93) - predicate:(key < 6) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 @@ -907,7 +1744,7 @@ POSTHOOK: Input: default@tbl2 56 PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -922,7 +1759,7 @@ on subq2.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -935,37 +1772,102 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_22] - Group By Operator [GBY_21] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=102) - Conds:SEL_2._col0=SEL_5._col0(Inner) - <-Select Operator [SEL_5] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_3] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=1 width=93) - Output:["_col0"] - Filter Operator [FIL_17] (rows=1 width=93) - predicate:((key < 8) and (key < 6)) - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"] +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from @@ -1016,7 +1918,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1025,49 +1927,107 @@ insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY -Plan not optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Execution mode: llap -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Map 1 llap - File Output Operator [FS_9] - table:{"name:":"default.dest1"} - Select Operator [SEL_8] (rows=11 width=93) - Output:["_col0","_col1"] - Select Operator [SEL_7] (rows=11 width=93) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_16] (rows=11 width=93) - Conds:FIL_14.key=FIL_15.key(Inner),Output:["_col0","_col1","_col6"] - <-Filter Operator [FIL_15] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Filter Operator [FIL_14] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - File Output Operator [FS_11] - table:{"name:":"default.dest2"} - Please refer to the previous Select Operator [SEL_7] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -1172,7 +2132,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1181,59 +2141,157 @@ insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY -Plan not optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:bigint] + vectorized: true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 2 vectorized, llap - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_24] (rows=5 width=93) - Output:["_col0","_col1"] - Group By Operator [GBY_23] (rows=5 width=93) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - File Output Operator [FS_9] - table:{"name:":"default.dest1"} - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:FIL_19.key=FIL_20.key(Inner),Output:["_col0","_col1"] - <-Filter Operator [FIL_20] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Filter Operator [FIL_19] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - SHUFFLE [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=11 width=93) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_10] (rows=11 width=93) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_21] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Stage: Stage-5 + Stats-Aggr Operator PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 6b59497..ff55c6e 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -120,6 +124,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan @@ -135,6 +147,14 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -180,13 +200,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -234,6 +258,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan @@ -249,6 +281,14 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 0d4425b..1e9e34c 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,8 +50,23 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -67,10 +86,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -104,8 +127,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -126,10 +164,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -160,8 +202,23 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) @@ -181,10 +238,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -218,8 +279,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -240,10 +316,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -274,8 +354,23 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -295,10 +390,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -329,8 +428,23 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) @@ -350,10 +464,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -384,8 +502,23 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) @@ -405,10 +538,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -442,8 +579,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -711,12 +863,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -752,8 +908,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -775,10 +946,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -814,8 +989,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -837,10 +1027,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -876,8 +1070,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -899,10 +1108,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -938,8 +1151,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index a510e38..da36627 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -155,6 +159,12 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Map 3 Map Operator Tree: TableScan @@ -175,8 +185,21 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -208,16 +231,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -253,8 +280,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -320,16 +362,20 @@ POSTHOOK: Input: default@hundredorc 3 zync studies PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,11 +394,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[10:binary] + vectorized: true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -360,6 +418,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -367,9 +430,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[10:binary], IdentityExpression[11:binary] + vectorized: true Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -377,26 +451,59 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[10:binary] + vectorized: true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index 05fac27..18f7dbe 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -34,9 +34,9 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_bucket.q.out ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 814ac75..e9a1393 100644 --- ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,15 +46,37 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[2:Long], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index cd67e7e..1f41652 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -148,8 +152,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -165,6 +182,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index b7b2ba5..f0190fb 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,8 +100,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -113,6 +132,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -179,18 +205,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -228,8 +258,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -245,6 +290,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_char_4.q.out ql/src/test/results/clientpositive/llap/vector_char_4.q.out index 6d55ab0..ac19b7f 100644 --- ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -146,9 +150,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToChar[13:Char], CastLongToChar[14:Char], CastLongToChar[15:Char], CastLongToChar[16:Char], VectorUDFAdaptor[17:char(20)], VectorUDFAdaptor[18:char(20)], CastStringGroupToChar[19:Char] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 1af8b3d..2846aa8 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -192,8 +204,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) @@ -231,11 +258,15 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -269,6 +300,14 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -298,8 +337,23 @@ STAGE PLANS: value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) @@ -339,11 +393,15 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -386,6 +444,14 @@ STAGE PLANS: value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -406,8 +472,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_char_simple.q.out ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index 3dea73d..babb526 100644 --- ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,8 +88,23 @@ STAGE PLANS: value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) @@ -148,16 +167,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -187,8 +210,23 @@ STAGE PLANS: value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) @@ -254,12 +292,16 @@ create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -282,33 +324,87 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( _col0 AS CHAR(12) (type: char(12)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToChar[1:Char] + vectorized: true Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_coalesce.q.out ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index c7897f7..0eee938 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -8,12 +8,16 @@ LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,8 +49,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) @@ -95,18 +114,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -138,8 +161,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) @@ -188,18 +226,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -228,8 +270,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: null (type: float), null (type: bigint), 0.0 (type: float) @@ -278,18 +335,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -321,8 +382,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) @@ -371,18 +447,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -411,8 +491,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: null (type: float), null (type: bigint), null (type: float) @@ -461,16 +556,20 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index b390bfd..862b04e 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -16,18 +16,22 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,14 +110,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,18 +170,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -194,9 +206,23 @@ STAGE PLANS: Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], VectorUDFAdaptor[4:Long](VectorCoalesce[3:string](IdentityExpression[0:string] ConstantVectorExpression[2:string])) + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[4:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -205,15 +231,43 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -221,9 +275,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], RoundWithNumDigitsDoubleToDouble[2:double](DoubleColDivideDoubleScalar[3:double](CastLongToDouble[2:double])) + vectorized: true Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -252,14 +317,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -277,9 +346,20 @@ STAGE PLANS: Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorCoalesce[3:string](IdentityExpression[0:string] ConstantVectorExpression[2:string]) + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -287,6 +367,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 08d49bc..ecf12f8 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -62,14 +62,18 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct PREHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,6 +101,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[mp] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -113,18 +123,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### orc_create_complex.str orc_create_complex.mp orc_create_complex.lst orc_create_complex.strct -line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a":"one","b":"two"} -line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} -line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} +line1 {"key13":"value13","key12":"value12","key11":"value11"} ["a","b","c"] {"a":"one","b":"two"} +line2 {"key21":"value21","key23":"value23","key22":"value22"} ["d","e","f"] {"a":"three","b":"four"} +line3 {"key33":"value33","key31":"value31","key32":"value32"} ["g","h","i"] {"a":"five","b":"six"} PREHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COUNT(*) FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COUNT(*) FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -143,28 +157,73 @@ STAGE PLANS: alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -188,14 +247,18 @@ POSTHOOK: Input: default@orc_create_complex c0 3 PREHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str FROM orc_create_complex ORDER BY str PREHOOK: type: QUERY POSTHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str FROM orc_create_complex ORDER BY str POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -216,22 +279,60 @@ STAGE PLANS: Select Operator expressions: str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 97d5642..d964af0 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -21,13 +21,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] c0 c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -91,6 +101,12 @@ STAGE PLANS: value expressions: _col1 (type: map) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[b] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -146,13 +162,17 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,6 +216,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type array of Column[a] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -212,6 +238,12 @@ STAGE PLANS: value expressions: a (type: array) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: UDF GenericUDFIndex(Column[a], Const int 1) not supported + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 3b9d9f9..2515434 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -47,10 +47,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -86,8 +90,21 @@ STAGE PLANS: value expressions: _col5 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -121,10 +138,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -159,8 +180,21 @@ STAGE PLANS: value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) @@ -190,10 +224,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -223,8 +261,21 @@ STAGE PLANS: value expressions: d (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) @@ -258,10 +309,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -289,8 +344,21 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 3d67664..633723c 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1260,8 +1264,21 @@ STAGE PLANS: Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[16:int] + vectorized: true Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[16:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1270,36 +1287,95 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1760000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index a7a74c3..78be2ad 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,10 +197,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -226,8 +234,23 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 64d5be7..e098229 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -47,7 +47,7 @@ POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] PREHOOK: query: -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -62,7 +62,7 @@ select from vector_date_1 order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -76,6 +76,10 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,23 +100,61 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], LongColEqualLongColumn[2:long], LongColNotEqualLongColumn[3:long], LongColLessEqualLongColumn[4:long], LongColLessEqualLongColumn[5:long], LongColLessLongColumn[6:long], LongColGreaterEqualLongColumn[7:long], LongColGreaterEqualLongColumn[8:long], LongColGreaterLongColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -158,7 +200,7 @@ POSTHOOK: Input: default@vector_date_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -172,7 +214,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -186,6 +228,10 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -206,23 +252,61 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], LongColNotEqualLongColumn[2:long], LongColEqualLongColumn[3:long], LongColLessLongColumn[4:long], LongColGreaterEqualLongColumn[5:long], LongColGreaterLongColumn[6:long], LongColGreaterLongColumn[7:long], LongColLessEqualLongColumn[8:long], LongColLessLongColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -269,7 +353,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false PREHOOK: query: -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -284,7 +368,7 @@ select from vector_date_1 order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -298,6 +382,10 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -318,23 +406,61 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColNotEqualDateScalar[2:long], DateColGreaterEqualDateScalar[3:long], DateColGreaterDateScalar[4:long], DateColLessEqualDateScalar[5:long], DateColLessDateScalar[6:long], DateScalarNotEqualDateColumn[7:long], DateScalarLessEqualDateColumn[8:long], DateScalarLessDateColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,7 +506,7 @@ POSTHOOK: Input: default@vector_date_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -394,7 +520,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -408,6 +534,10 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -428,23 +558,61 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColEqualDateScalar[2:long], DateColLessEqualDateScalar[3:long], DateColLessDateScalar[4:long], DateColGreaterEqualDateScalar[5:long], DateColGreaterDateScalar[6:long], DateScalarEqualDateColumn[7:long], DateScalarGreaterEqualDateColumn[8:long], DateScalarGreaterDateColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -492,7 +660,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 false false false false false false false false PREHOOK: query: -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -507,7 +675,7 @@ order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -520,6 +688,10 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -538,28 +710,72 @@ STAGE PLANS: alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterLongColNotEqualLongColumn[-1:boolean] FilterLongColLessLongColumn[-1:boolean] FilterLongColLessEqualLongColumn[-1:boolean] FilterLongColGreaterLongColumn[-1:boolean] FilterLongColGreaterEqualLongColumn[-1:boolean]) + vectorized: true predicate: ((dt1 = dt1) and (dt1 <> dt2) and (dt1 < dt2) and (dt1 <= dt2) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -604,7 +820,7 @@ POSTHOOK: Input: default@vector_date_1 2001-01-01 2001-06-01 PREHOOK: query: -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -621,7 +837,7 @@ order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -636,6 +852,10 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -654,27 +874,71 @@ STAGE PLANS: alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDateColEqualDateScalar[-1:boolean] FilterDateScalarEqualDateColumn[-1:boolean] FilterDateColNotEqualDateScalar[-1:boolean] FilterDateScalarNotEqualDateColumn[-1:boolean] FilterDateColGreaterDateScalar[-1:boolean] FilterDateColGreaterEqualDateScalar[-1:boolean] FilterDateScalarLessDateColumn[-1:boolean] FilterDateScalarLessEqualDateColumn[-1:boolean]) + vectorized: true predicate: ((dt1 = 2001-01-01) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01) and (1970-01-01 <> dt1) and (dt1 > 1970-01-01) and (dt1 >= 1970-01-01) and (1970-01-01 < dt1) and (1970-01-01 <= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt2 (type: date) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 2001-01-01 (type: date), VALUE._col0 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[1:long], IdentityExpression[0:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index b3ff557..e4f77ea 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -32,12 +32,16 @@ POSTHOOK: Output: default@decimal_1 POSTHOOK: Lineage: decimal_1.t EXPRESSION [] POSTHOOK: Lineage: decimal_1.u EXPRESSION [] POSTHOOK: Lineage: decimal_1.v EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -58,22 +62,60 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -95,12 +137,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,22 +167,60 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -158,12 +242,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -184,22 +272,60 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:smallint] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -221,12 +347,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -247,22 +377,60 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -284,12 +452,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -310,22 +482,60 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -347,12 +557,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -373,22 +587,60 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[3:double] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:float] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -410,12 +662,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -436,22 +692,60 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[3:double] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -473,12 +767,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -499,22 +797,60 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[3:String] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -536,12 +872,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -562,22 +902,60 @@ STAGE PLANS: Select Operator expressions: CAST( t AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToTimestamp[3:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out index 5576078..cf1b256 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out @@ -33,12 +33,16 @@ POSTHOOK: Input: default@decimal_txt POSTHOOK: Output: database:default POSTHOOK: Output: default@DECIMAL POSTHOOK: Lineage: decimal.dec SIMPLE [(decimal_txt)decimal_txt.FieldSchema(name:dec, type:decimal(10,0), comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dec FROM `DECIMAL` order by dec PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dec FROM `DECIMAL` order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -59,22 +63,60 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index ef1e561..33f0a44 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -21,12 +21,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@decimal_2 POSTHOOK: Lineage: decimal_2.t EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,22 +51,60 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -84,12 +126,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,22 +156,60 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -147,12 +231,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -173,22 +261,60 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:smallint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -210,12 +336,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -236,22 +366,60 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -273,12 +441,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -299,22 +471,60 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -336,12 +546,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -362,22 +576,60 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:float] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -399,12 +651,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -425,22 +681,60 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -462,12 +756,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -488,22 +786,60 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[1:String] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -536,12 +872,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@decimal_2 POSTHOOK: Lineage: decimal_2.t EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -562,22 +902,60 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -599,12 +977,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -625,22 +1007,60 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -662,12 +1082,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -688,22 +1112,60 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:smallint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -725,12 +1187,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### -3827 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -751,22 +1217,60 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -788,12 +1292,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -814,22 +1322,60 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -851,12 +1397,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -877,22 +1427,60 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:float] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -914,12 +1502,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -940,22 +1532,60 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -977,12 +1607,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5044003 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1003,22 +1637,60 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[1:String] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1040,12 +1712,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5044003 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1064,21 +1740,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(4,2)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1100,12 +1813,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3.14 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1124,21 +1841,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(4,2)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1160,12 +1914,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3.14 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1184,21 +1942,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 1355944339.1234567 (type: decimal(30,8)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(30,8)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1220,12 +2015,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1355944339.12345670 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1244,21 +2043,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1271,12 +2107,16 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1295,21 +2135,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1331,12 +2208,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3Y as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3Y as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1355,21 +2236,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1391,12 +2309,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3S as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3S as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1415,21 +2337,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1451,12 +2410,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast(3 as int) as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast(3 as int) as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1475,21 +2438,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1511,12 +2511,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3L as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3L as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1535,21 +2539,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1571,12 +2612,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1595,21 +2640,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(20,19)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(20,19)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1631,12 +2713,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1.0000000000000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1655,21 +2741,58 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 0.99999999999999999999 (type: decimal(20,20)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:decimal(20,20)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 6faf453..e68e9ab 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -75,8 +79,23 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) @@ -132,7 +151,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -140,13 +159,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -182,8 +205,21 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index 16d9929..6cd1da2 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index d37b973..a9f9342 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -15,14 +15,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,8 +58,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,23)), KEY.reducesinkkey3 (type: decimal(38,28)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 361c46b..f127feb 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,11 +100,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[1:long]) + vectorized: true predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(4,2)] + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -108,12 +124,24 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + vectorized: true outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -121,25 +149,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[1:long]) + vectorized: true predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(4,0)] + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out index 6d5b578..02ab85b 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out @@ -14,7 +14,7 @@ POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdecimal1 ,Round(cdecimal1, 2) @@ -53,7 +53,7 @@ and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdecimal1 ,Round(cdecimal1, 2) @@ -90,6 +90,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5583874..6226739 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750190521 1234567890.1234560000 NULL 1234567890.1234567890 NULL -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -581,8 +585,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 4063199..03f83b9 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -30,12 +30,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,15 +68,37 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)], IdentityExpression[1:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -94,12 +120,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,15 +158,37 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:decimal(10,0)], IdentityExpression[0:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -186,12 +238,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -220,15 +276,37 @@ STAGE PLANS: value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)], IdentityExpression[1:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -250,12 +328,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -284,15 +366,37 @@ STAGE PLANS: value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:decimal(10,0)], IdentityExpression[0:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -342,12 +446,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -368,23 +476,61 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)], FuncRoundWithNumDigitsDecimalToDecimal[1:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)], IdentityExpression[1:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -406,12 +552,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -432,23 +582,61 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:decimal(10,0)], IdentityExpression[0:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index 882e83d..71a63a4 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,23 +82,61 @@ STAGE PLANS: Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundDecimalToDecimal[1:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[12:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(21,0)], IdentityExpression[1:decimal(21,0)], IdentityExpression[2:decimal(22,1)], IdentityExpression[3:decimal(23,2)], IdentityExpression[4:decimal(24,3)], IdentityExpression[5:decimal(21,0)], IdentityExpression[6:decimal(21,0)], IdentityExpression[7:decimal(21,0)], IdentityExpression[8:decimal(21,0)], IdentityExpression[9:decimal(21,0)], IdentityExpression[10:decimal(21,0)], IdentityExpression[11:decimal(21,0)], IdentityExpression[12:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,7 +195,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -163,7 +205,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -173,6 +215,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,23 +239,61 @@ STAGE PLANS: Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(21,0)], FuncRoundDecimalToDecimal[12:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[14:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[15:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[16:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[17:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[18:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[19:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[20:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[21:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(21,0)], IdentityExpression[1:decimal(21,0)], IdentityExpression[2:decimal(22,1)], IdentityExpression[3:decimal(23,2)], IdentityExpression[4:decimal(24,3)], IdentityExpression[5:decimal(25,4)], IdentityExpression[6:decimal(21,0)], IdentityExpression[7:decimal(21,0)], IdentityExpression[8:decimal(21,0)], IdentityExpression[9:decimal(21,0)], IdentityExpression[10:decimal(21,0)], IdentityExpression[11:decimal(21,0)], IdentityExpression[12:decimal(22,1)], IdentityExpression[13:decimal(23,2)], IdentityExpression[14:decimal(24,3)], IdentityExpression[15:decimal(25,4)], IdentityExpression[16:decimal(21,0)], IdentityExpression[17:decimal(21,0)], IdentityExpression[18:decimal(21,0)], IdentityExpression[19:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -273,7 +357,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -294,7 +378,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -315,6 +399,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -335,23 +423,61 @@ STAGE PLANS: Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal[1:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[12:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[14:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[15:decimal(26,5)], FuncRoundWithNumDigitsDecimalToDecimal[16:decimal(27,6)], FuncRoundWithNumDigitsDecimalToDecimal[17:decimal(28,7)], FuncRoundWithNumDigitsDecimalToDecimal[18:decimal(29,8)], FuncRoundWithNumDigitsDecimalToDecimal[19:decimal(30,9)], FuncRoundWithNumDigitsDecimalToDecimal[20:decimal(31,10)], FuncRoundWithNumDigitsDecimalToDecimal[21:decimal(32,11)], FuncRoundWithNumDigitsDecimalToDecimal[22:decimal(33,12)], FuncRoundWithNumDigitsDecimalToDecimal[23:decimal(34,13)], FuncRoundWithNumDigitsDecimalToDecimal[24:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[25:decimal(35,14)], FuncRoundWithNumDigitsDecimalToDecimal[26:decimal(36,15)], FuncRoundWithNumDigitsDecimalToDecimal[27:decimal(37,16)], FuncRoundWithNumDigitsDecimalToDecimal[28:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[29:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[30:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[31:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[32:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[33:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(21,0)], IdentityExpression[1:decimal(21,0)], IdentityExpression[2:decimal(21,0)], IdentityExpression[3:decimal(21,0)], IdentityExpression[4:decimal(21,0)], IdentityExpression[5:decimal(21,0)], IdentityExpression[6:decimal(21,0)], IdentityExpression[7:decimal(21,0)], IdentityExpression[8:decimal(21,0)], IdentityExpression[9:decimal(21,0)], IdentityExpression[10:decimal(21,0)], IdentityExpression[11:decimal(21,0)], IdentityExpression[12:decimal(21,0)], IdentityExpression[13:decimal(21,0)], IdentityExpression[14:decimal(21,0)], IdentityExpression[15:decimal(21,0)], IdentityExpression[16:decimal(21,0)], IdentityExpression[17:decimal(22,1)], IdentityExpression[18:decimal(23,2)], IdentityExpression[19:decimal(24,3)], IdentityExpression[20:decimal(25,4)], IdentityExpression[21:decimal(26,5)], IdentityExpression[22:decimal(27,6)], IdentityExpression[23:decimal(28,7)], IdentityExpression[24:decimal(29,8)], IdentityExpression[25:decimal(30,9)], IdentityExpression[26:decimal(31,10)], IdentityExpression[27:decimal(32,11)], IdentityExpression[28:decimal(33,12)], IdentityExpression[29:decimal(34,13)], IdentityExpression[29:decimal(34,13)], IdentityExpression[30:decimal(35,14)], IdentityExpression[31:decimal(36,15)], IdentityExpression[32:decimal(37,16)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -438,14 +564,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -466,23 +596,61 @@ STAGE PLANS: Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(30,9)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(30,9)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(30,9)], IdentityExpression[1:decimal(30,9)], ConstantVectorExpression[2:decimal(17,9)], ConstantVectorExpression[3:decimal(17,9)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 40dc99a..8753c74 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,6 +85,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -134,10 +146,14 @@ NULL 2.0000000000 -2469135780.2469135780 2469135780.2469135600 -PREHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -165,6 +181,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -218,10 +242,14 @@ NULL 2.0000000000 -2469135780.1234567890 2469135780.1234567800 -PREHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -249,6 +277,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -302,10 +338,14 @@ NULL 1.5 -1.8518518351234567E9 1.8518518351234567E9 -PREHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,6 +373,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -387,11 +435,15 @@ NULL -1.2345678891234567E9 1.2345678911234567E9 PREHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -419,6 +471,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -472,10 +532,14 @@ NULL 0.0000000000 0.0000000000 0.0000000000 -PREHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -503,6 +567,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -556,10 +628,14 @@ NULL 0.0000000000 -0.1234567890 0.1234567800 -PREHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -587,6 +663,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -640,10 +724,14 @@ NULL 0.5 -6.172839451234567E8 6.172839451234567E8 -PREHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -671,6 +759,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -725,11 +821,15 @@ NULL -1.2345678911234567E9 1.2345678891234567E9 PREHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -757,6 +857,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -810,10 +918,14 @@ NULL 1.00000000000000000000 NULL NULL -PREHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key, value FROM DECIMAL_UDF where key * value > 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -844,6 +956,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -882,10 +1002,14 @@ POSTHOOK: Input: default@decimal_udf 1.0000000000 1 -1234567890.1234567890 -1234567890 1234567890.1234567800 1234567890 -PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -913,6 +1037,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -966,10 +1098,14 @@ NULL 1.0000000000 1524157875171467887.5019052100 1524157875171467876.3907942000 -PREHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -997,6 +1133,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1050,10 +1194,14 @@ NULL 0.5 7.6207893758573389E17 7.6207893758573389E17 -PREHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1081,6 +1229,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1135,11 +1291,15 @@ NULL -2.4691357802469134E9 2.4691357802469134E9 PREHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1170,6 +1330,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1186,10 +1354,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / NULL FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1220,6 +1392,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: org.apache.hadoop.hive.ql.metadata.HiveException: Could not instantiate DoubleColDivideDoubleScalar with arguments arguments: [0, ConstantVectorExpression[1:double], 2], argument classes: [Integer, ConstantVectorExpression, Integer], exception: java.lang.IllegalArgumentException +#### A masked pattern was here #### + + vectorized: false Stage: Stage-0 Fetch Operator @@ -1236,10 +1416,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1270,6 +1454,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1319,10 +1511,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000000 1.000000000000000000000000 1.000000000000000000000000 -PREHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1353,6 +1549,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1392,10 +1596,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000 1.000000000100000000000 1.000000000099999992710 -PREHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1426,6 +1634,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1465,10 +1681,14 @@ POSTHOOK: Input: default@decimal_udf 2.0 2.0000000002 2.0000000002 -PREHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1496,6 +1716,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1550,11 +1778,15 @@ NULL -6.172839440617284E8 6.172839460617284E8 PREHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1582,6 +1814,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1636,11 +1876,15 @@ NULL 1234567890.1234567890 1234567890.1234567800 PREHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY POSTHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1677,8 +1921,21 @@ STAGE PLANS: value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) @@ -1697,6 +1954,13 @@ STAGE PLANS: value expressions: _col1 (type: decimal(38,23)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,23)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) @@ -1742,11 +2006,15 @@ POSTHOOK: Input: default@decimal_udf 4400 -4400.00000000000000000000000 -4400.00000000000000 -4400.0000000000 1234567890 1234567890.12345678000000000000000 1234567890.12345678000000 1234567890.1234567800 PREHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1774,6 +2042,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1828,11 +2104,15 @@ NULL 1234567890.1234567890 -1234567890.1234567800 PREHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1981,11 +2261,15 @@ NULL -1234567890 1234567891 PREHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2013,6 +2297,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2067,11 +2359,15 @@ NULL -1234567891 1234567890 PREHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2099,6 +2395,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2153,11 +2457,15 @@ NULL -1234567890.12 1234567890.12 PREHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2185,6 +2493,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -2239,11 +2555,15 @@ NULL 1.52415787532388352E18 1.52415787532388352E18 PREHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2271,6 +2591,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -2325,11 +2653,15 @@ NULL -617283944.061728394500000000 1.000000000000000000 PREHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2365,8 +2697,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) @@ -2414,11 +2759,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2454,8 +2803,21 @@ STAGE PLANS: value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) @@ -2503,11 +2865,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2540,8 +2906,19 @@ STAGE PLANS: value expressions: _col0 (type: array) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: histogram_numeric(VALUE._col0) @@ -2572,11 +2949,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### [{"x":-1.2345678901234567E9,"y":1.0},{"x":-144.50057142857142,"y":35.0},{"x":1.2345678901234567E9,"y":1.0}] PREHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MIN(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MIN(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2609,8 +2990,23 @@ STAGE PLANS: value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -2641,11 +3037,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1234567890.1234567890 PREHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MAX(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MAX(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2678,8 +3078,23 @@ STAGE PLANS: value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -2710,11 +3125,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 1234567890.1234567800 PREHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT COUNT(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT COUNT(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2747,8 +3166,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index 5ea9f4d..a8e2b6d 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,14 +75,31 @@ STAGE PLANS: alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterDecimalColEqualDecimalScalar[-1:boolean] + vectorized: true predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:double], ConstantVectorExpression[3:double], ConstantVectorExpression[4:double], ConstantVectorExpression[5:double], ConstantVectorExpression[6:double], ConstantVectorExpression[7:double], ConstantVectorExpression[8:double] + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -86,6 +107,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -104,20 +133,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -133,14 +166,31 @@ STAGE PLANS: alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterDecimalColEqualDecimalScalar[-1:boolean] + vectorized: true predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:double], ConstantVectorExpression[3:double], ConstantVectorExpression[4:double], ConstantVectorExpression[5:double], FuncLogWithBaseLongToDouble[6:double], VectorUDFAdaptor[7:Double], ConstantVectorExpression[8:double], ConstantVectorExpression[9:double] + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -148,6 +198,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 188c624..2140d42 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,8 +135,21 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -141,13 +158,40 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -155,9 +199,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_elt.q.out ql/src/test/results/clientpositive/llap/vector_elt.q.out index bb66867..a78a60f 100644 --- ql/src/test/results/clientpositive/llap/vector_elt.q.out +++ ql/src/test/results/clientpositive/llap/vector_elt.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -47,7 +51,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +64,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,6 +77,10 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index 1e24e81..a1d7772 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -53,18 +57,52 @@ STAGE PLANS: Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringSubstrColStartLen[2:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -73,17 +111,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 758f70c..bf0ab76 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -53,18 +57,52 @@ STAGE PLANS: Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringSubstrColStartLen[2:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 @@ -73,17 +111,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index fb5dfe6..ec47aa9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,9 +135,23 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -142,15 +160,43 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[2:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -158,9 +204,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:tinyint], IdentityExpression[2:bigint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index ff658d7..415fa9d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,6 +1,6 @@ Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization detail select * from src where not key in @@ -8,7 +8,7 @@ where not key in order by key PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization detail select * from src where not key in diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index c4bcbab..31c88ec 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -247,8 +251,21 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -257,23 +274,61 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -324,7 +379,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(ss_ticket_number) m from @@ -336,7 +391,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(ss_ticket_number) m from @@ -348,6 +403,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -369,8 +428,21 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -379,19 +451,54 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 @@ -399,20 +506,50 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int] + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -533,7 +670,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -545,7 +682,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -557,6 +694,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -578,9 +719,23 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int], IdentityExpression[2:int], IdentityExpression[10:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[10:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int], IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -589,15 +744,44 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -605,9 +789,23 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[0:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[1:int]), VectorUDAFSumLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -615,17 +813,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:bigint], IdentityExpression[2:bigint] + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -746,7 +968,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number, ss_item_sk, sum(q) from @@ -758,7 +980,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number, ss_item_sk, sum(q) from @@ -770,6 +992,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -791,9 +1017,23 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int], IdentityExpression[2:int], IdentityExpression[10:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[10:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int], IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -802,15 +1042,43 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -818,9 +1086,23 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[0:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -828,17 +1110,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:bigint] + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 8e55ce3..e239708 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -127,16 +127,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,8 +174,19 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: Pruning grouping set id not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -212,16 +227,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -255,10 +274,30 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -266,9 +305,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 555340d..47d5efd 100644 --- ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,28 +26,72 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsTrue[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 4587 Data size: 13704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IfExprStringScalarStringScalar[12:String] + vectorized: true Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index e939c67..a0e6e38 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -181,16 +181,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[13][bigTable=store_sales] in task 'Map 2' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -211,10 +215,24 @@ STAGE PLANS: Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 200 Data size: 74200 Basic stats: COMPLETE Column stats: NONE value expressions: cd_demo_sk (type: int), cd_marital_status (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -226,36 +244,93 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Not empty key IS false + vectorized: true outputColumnNames: _col0, _col2, _col16 input vertices: 0 Map 1 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterStringGroupColEqualStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterStringGroupColEqualStringScalar[-1:boolean])) + vectorized: true predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression[3:long]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index ca07200..204bc97 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,6 +89,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -104,6 +116,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -122,12 +142,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -171,6 +195,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -195,6 +227,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -249,12 +289,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -302,6 +346,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -322,6 +374,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -340,12 +400,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -378,6 +442,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -409,6 +481,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -427,12 +507,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -480,6 +564,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -500,6 +592,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -518,12 +618,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -571,6 +675,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -591,6 +703,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -609,12 +729,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -662,6 +786,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -682,6 +814,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -700,12 +840,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -738,6 +882,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -773,6 +925,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -791,12 +951,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -829,6 +993,14 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -864,6 +1036,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_1.q.out ql/src/test/results/clientpositive/llap/vector_interval_1.q.out index d8003ba..04e77df 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_1.q.out @@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] PREHOOK: query: -- constants/cast from string -explain +explain vectorization detail select str1, interval '1-2' year to month, interval_year_month(str1), @@ -47,13 +47,17 @@ select from vector_interval_1 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- constants/cast from string -explain +explain vectorization detail select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -74,23 +78,61 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], CastStringToIntervalYearMonth[4:interval_year_month], CastStringToIntervalDayTime[5:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], ConstantVectorExpression[3:long], IdentityExpression[1:interval_year_month], ConstantVectorExpression[4:interval_day_time], IdentityExpression[2:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -122,7 +164,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: -- interval arithmetic -explain +explain vectorization detail select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -134,7 +176,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- interval arithmetic -explain +explain vectorization detail select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -145,6 +187,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -165,23 +211,61 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], IntervalYearMonthColAddIntervalYearMonthColumn[6:long](CastStringToIntervalYearMonth[4:interval_year_month] CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddIntervalYearMonthColumn[5:long](CastStringToIntervalYearMonth[4:interval_year_month]), IntervalYearMonthColSubtractIntervalYearMonthColumn[8:long](CastStringToIntervalYearMonth[4:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthScalarSubtractIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[4:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], ConstantVectorExpression[5:long], IdentityExpression[1:interval_year_month], IdentityExpression[2:interval_year_month], ConstantVectorExpression[6:long], IdentityExpression[3:interval_year_month], IdentityExpression[4:interval_year_month] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -220,7 +304,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -231,7 +315,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -242,6 +326,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -262,23 +350,61 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], IntervalDayTimeColAddIntervalDayTimeColumn[6:interval_day_time](CastStringToIntervalDayTime[4:interval_day_time] CastStringToIntervalDayTime[5:interval_day_time]), IntervalDayTimeScalarAddIntervalDayTimeColumn[5:timestamp](CastStringToIntervalDayTime[4:interval_day_time]), IntervalDayTimeColSubtractIntervalDayTimeColumn[8:interval_day_time](CastStringToIntervalDayTime[4:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeScalarSubtractIntervalDayTimeColumn[7:timestamp](CastStringToIntervalDayTime[4:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], ConstantVectorExpression[5:interval_day_time], IdentityExpression[1:interval_day_time], IdentityExpression[2:interval_day_time], ConstantVectorExpression[6:interval_day_time], IdentityExpression[3:interval_day_time], IdentityExpression[4:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -318,7 +444,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-interval arithmetic -explain +explain vectorization detail select dt, dt + interval '1-2' year to month, @@ -336,7 +462,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-interval arithmetic -explain +explain vectorization detail select dt, dt + interval '1-2' year to month, @@ -353,6 +479,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -373,23 +503,61 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], DateColAddIntervalYearMonthScalar[4:long], DateColAddIntervalYearMonthColumn[6:long](CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddDateColumn[7:long], IntervalYearMonthColAddDateColumn[8:long](CastStringToIntervalYearMonth[5:interval_year_month]), DateColSubtractIntervalYearMonthScalar[9:long], DateColSubtractIntervalYearMonthColumn[10:long](CastStringToIntervalYearMonth[5:interval_year_month]), DateColAddIntervalDayTimeScalar[11:timestamp], DateColAddIntervalDayTimeColumn[13:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), IntervalDayTimeScalarAddDateColumn[14:timestamp], IntervalDayTimeColAddDateColumn[15:interval_day_time](CastStringToIntervalDayTime[12:interval_day_time]), DateColSubtractIntervalDayTimeScalar[16:timestamp], DateColSubtractIntervalDayTimeColumn[17:timestamp](CastStringToIntervalDayTime[12:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], IdentityExpression[2:date], IdentityExpression[3:date], IdentityExpression[4:date], IdentityExpression[5:date], IdentityExpression[6:date], IdentityExpression[7:timestamp], IdentityExpression[8:timestamp], IdentityExpression[9:timestamp], IdentityExpression[10:timestamp], IdentityExpression[11:timestamp], IdentityExpression[12:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -441,7 +609,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization detail select ts, ts + interval '1-2' year to month, @@ -459,7 +627,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization detail select ts, ts + interval '1-2' year to month, @@ -476,6 +644,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -496,23 +668,61 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], TimestampColAddIntervalYearMonthScalar[4:timestamp], TimestampColAddIntervalYearMonthColumn[6:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddTimestampColumn[7:timestamp], IntervalYearMonthColAddTimestampColumn[8:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), TimestampColSubtractIntervalYearMonthScalar[9:timestamp], TimestampColSubtractIntervalYearMonthColumn[10:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), TimestampColAddIntervalDayTimeScalar[11:timestamp], TimestampColAddIntervalDayTimeColumn[13:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), IntervalDayTimeScalarAddTimestampColumn[14:timestamp], IntervalDayTimeColAddTimestampColumn[15:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), TimestampColSubtractIntervalDayTimeScalar[16:timestamp], TimestampColSubtractIntervalDayTimeColumn[17:timestamp](CastStringToIntervalDayTime[12:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:timestamp], IdentityExpression[2:timestamp], IdentityExpression[3:timestamp], IdentityExpression[4:timestamp], IdentityExpression[5:timestamp], IdentityExpression[6:timestamp], IdentityExpression[7:timestamp], IdentityExpression[8:timestamp], IdentityExpression[9:timestamp], IdentityExpression[10:timestamp], IdentityExpression[11:timestamp], IdentityExpression[12:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -564,7 +774,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization detail select ts, ts - ts, @@ -573,7 +783,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization detail select ts, ts - ts, @@ -581,6 +791,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -601,23 +815,61 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], TimestampColSubtractTimestampColumn[4:interval_day_time], TimestampScalarSubtractTimestampColumn[5:timestamp], TimestampColSubtractTimestampScalar[6:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:interval_day_time], IdentityExpression[2:interval_day_time], IdentityExpression[3:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -651,7 +903,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-date arithmetic -explain +explain vectorization detail select dt, dt - dt, @@ -660,7 +912,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-date arithmetic -explain +explain vectorization detail select dt, dt - dt, @@ -668,6 +920,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -688,23 +944,61 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], DateColSubtractDateColumn[4:timestamp], DateScalarSubtractDateColumn[5:timestamp], DateColSubtractDateScalar[6:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:interval_day_time], IdentityExpression[2:interval_day_time], IdentityExpression[3:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -738,7 +1032,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization detail select dt, ts - dt, @@ -750,7 +1044,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization detail select dt, ts - dt, @@ -761,6 +1055,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -781,23 +1079,61 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], TimestampColSubtractDateColumn[4:interval_day_time], TimestampScalarSubtractDateColumn[5:interval_day_time], TimestampColSubtractDateScalar[6:interval_day_time], DateColSubtractTimestampColumn[7:interval_day_time], DateColSubtractTimestampScalar[8:interval_day_time], DateScalarSubtractTimestampColumn[9:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:interval_day_time], IdentityExpression[2:interval_day_time], IdentityExpression[3:interval_day_time], IdentityExpression[4:interval_day_time], IdentityExpression[5:interval_day_time], IdentityExpression[6:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_2.q.out ql/src/test/results/clientpositive/llap/vector_interval_2.q.out index 23a977e..9bf749b 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_2.q.out @@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] PREHOOK: query: -- interval comparisons in select clause -explain +explain vectorization detail select str1, -- Should all be true @@ -78,7 +78,7 @@ from vector_interval_2 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- interval comparisons in select clause -explain +explain vectorization detail select str1, -- Should all be true @@ -110,6 +110,10 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,23 +134,61 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], LongColEqualLongColumn[8:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[9:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[10:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessLongColumn[11:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterEqualLongColumn[12:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterEqualLongColumn[13:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterLongColumn[14:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColNotEqualLongColumn[15:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColEqualIntervalYearMonthScalar[16:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[17:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[18:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessIntervalYearMonthScalar[19:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[20:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[21:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterIntervalYearMonthScalar[22:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColNotEqualIntervalYearMonthScalar[23:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarEqualIntervalYearMonthColumn[24:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[25:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[26:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessIntervalYearMonthColumn[27:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[28:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[29:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterIntervalYearMonthColumn[30:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarNotEqualIntervalYearMonthColumn[31:long](CastStringToIntervalYearMonth[6:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean], IdentityExpression[10:boolean], IdentityExpression[11:boolean], IdentityExpression[12:boolean], IdentityExpression[13:boolean], IdentityExpression[14:boolean], IdentityExpression[15:boolean], IdentityExpression[16:boolean], IdentityExpression[17:boolean], IdentityExpression[18:boolean], IdentityExpression[19:boolean], IdentityExpression[20:boolean], IdentityExpression[21:boolean], IdentityExpression[22:boolean], IdentityExpression[23:boolean], IdentityExpression[24:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -227,7 +269,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str1, -- Should all be false @@ -253,7 +295,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str1, -- Should all be false @@ -279,6 +321,10 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -299,23 +345,61 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], LongColNotEqualLongColumn[8:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[9:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessIntervalYearMonthScalar[10:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarNotEqualIntervalYearMonthColumn[11:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[12:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterIntervalYearMonthColumn[13:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[14:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessIntervalYearMonthColumn[15:long](CastStringToIntervalYearMonth[6:interval_year_month]), LongColGreaterEqualLongColumn[16:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterLongColumn[17:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[18:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessLongColumn[19:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColNotEqualIntervalYearMonthScalar[20:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[21:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterIntervalYearMonthScalar[22:long](CastStringToIntervalYearMonth[6:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[1:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean], IdentityExpression[10:boolean], IdentityExpression[6:boolean], IdentityExpression[11:boolean], IdentityExpression[12:boolean], IdentityExpression[13:boolean], IdentityExpression[14:boolean], IdentityExpression[15:boolean], IdentityExpression[11:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -384,7 +468,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str3, -- Should all be true @@ -416,7 +500,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str3, -- Should all be true @@ -448,6 +532,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -468,23 +556,61 @@ STAGE PLANS: Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:string], IntervalDayTimeColEqualIntervalDayTimeColumn[8:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[9:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[10:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeColumn[11:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[12:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[13:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeColumn[14:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeColumn[15:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColEqualIntervalDayTimeScalar[16:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[17:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[18:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeScalar[19:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[20:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[21:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeScalar[22:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeScalar[23:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarEqualIntervalDayTimeColumn[24:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[25:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[26:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessIntervalDayTimeColumn[27:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[28:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[29:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterIntervalDayTimeColumn[30:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarNotEqualIntervalDayTimeColumn[31:long](CastStringToIntervalDayTime[6:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean], IdentityExpression[10:boolean], IdentityExpression[11:boolean], IdentityExpression[12:boolean], IdentityExpression[13:boolean], IdentityExpression[14:boolean], IdentityExpression[15:boolean], IdentityExpression[16:boolean], IdentityExpression[17:boolean], IdentityExpression[18:boolean], IdentityExpression[19:boolean], IdentityExpression[20:boolean], IdentityExpression[21:boolean], IdentityExpression[22:boolean], IdentityExpression[23:boolean], IdentityExpression[24:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -565,7 +691,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str3, -- Should all be false @@ -591,7 +717,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str3, -- Should all be false @@ -617,6 +743,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -637,23 +767,61 @@ STAGE PLANS: Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:string], IntervalDayTimeColNotEqualIntervalDayTimeColumn[8:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[9:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeScalar[10:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarNotEqualIntervalDayTimeColumn[11:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[12:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterIntervalDayTimeColumn[13:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[14:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessIntervalDayTimeColumn[15:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[16:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeColumn[17:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[18:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeColumn[19:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeScalar[20:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[21:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeScalar[22:long](CastStringToIntervalDayTime[6:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:boolean], IdentityExpression[2:boolean], IdentityExpression[3:boolean], IdentityExpression[4:boolean], IdentityExpression[5:boolean], IdentityExpression[1:boolean], IdentityExpression[6:boolean], IdentityExpression[7:boolean], IdentityExpression[8:boolean], IdentityExpression[9:boolean], IdentityExpression[10:boolean], IdentityExpression[6:boolean], IdentityExpression[11:boolean], IdentityExpression[12:boolean], IdentityExpression[13:boolean], IdentityExpression[14:boolean], IdentityExpression[15:boolean], IdentityExpression[11:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -723,7 +891,7 @@ POSTHOOK: Input: default@vector_interval_2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 false false false false false false false false false false false false false false false false false false PREHOOK: query: -- interval expressions in predicates -explain +explain vectorization detail select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -749,7 +917,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- interval expressions in predicates -explain +explain vectorization detail select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -774,6 +942,10 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -792,27 +964,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColNotEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColLessEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColLessLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColGreaterEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColGreaterLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterIntervalYearMonthColEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColLessIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColGreaterIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarLessIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month])) + vectorized: true predicate: ((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -878,7 +1094,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -903,7 +1119,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -928,6 +1144,10 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -946,27 +1166,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterIntervalDayTimeColEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColLessIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColGreaterIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColLessIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColGreaterIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarLessIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time])) + vectorized: true predicate: ((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1032,7 +1296,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1052,7 +1316,7 @@ where and dt != dt + interval '1-2' year to month order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1072,6 +1336,10 @@ where and dt != dt + interval '1-2' year to month order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1090,27 +1358,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDateScalarEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarLessEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarGreaterEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColLessEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColGreaterEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterLongColNotEqualLongColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateScalarLessEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateScalarGreaterEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColLessEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColGreaterEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterLongColNotEqualLongColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long])) + vectorized: true predicate: ((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 = (dt + 1-2)) and (2002-03-01 <= (dt + 1-2)) and (2002-03-01 >= (dt + 1-2)) and ((dt + 1-2) = 2002-03-01) and ((dt + 1-2) <= 2002-03-01) and ((dt + 1-2) >= 2002-03-01) and (dt <> (dt + 1-2))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1166,7 +1478,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1191,7 +1503,7 @@ where and ts > ts - interval '1' year order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1216,6 +1528,10 @@ where and ts > ts - interval '1' year order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1234,27 +1550,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalYearMonthScalar[6:timestamp])) + vectorized: true predicate: ((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2)) and (2002-03-01 01:02:03.0 >= (ts + 1-2)) and (2002-04-01 01:02:03.0 <> (ts + 1-2)) and (2002-02-01 01:02:03.0 < (ts + 1-2)) and (2002-04-01 01:02:03.0 > (ts + 1-2)) and ((ts + 1-2) = 2002-03-01 01:02:03.0) and ((ts + 1-2) >= 2002-03-01 01:02:03.0) and ((ts + 1-2) <= 2002-03-01 01:02:03.0) and ((ts + 1-2) <> 2002-04-01 01:02:03.0) and ((ts + 1-2) > 2002-02-01 01:02:03.0) and ((ts + 1-2) < 2002-04-01 01:02:03.0) and (ts = (ts + 0-0)) and (ts <> (ts + 1-0)) and (ts <= (ts + 1-0)) and (ts < (ts + 1-0)) and (ts >= (ts - 1-0)) and (ts > (ts - 1-0))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1321,7 +1681,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 PREHOOK: query: -- day to second expressions in predicate -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1347,7 +1707,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- day to second expressions in predicate -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1372,6 +1732,10 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1390,27 +1754,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp])) + vectorized: true predicate: ((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000)) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0) and (ts = (dt + 0 01:02:03.000000000)) and (ts <> (dt + 0 01:02:04.000000000)) and (ts <= (dt + 0 01:02:03.000000000)) and (ts < (dt + 0 01:02:04.000000000)) and (ts >= (dt - 0 01:02:03.000000000)) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1476,7 +1884,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1501,7 +1909,7 @@ where and ts > ts - interval '1' day order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1526,6 +1934,10 @@ where and ts > ts - interval '1' day order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1544,27 +1956,71 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp])) + vectorized: true predicate: ((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000)) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0) and (ts = (ts + 0 00:00:00.000000000)) and (ts <> (ts + 1 00:00:00.000000000)) and (ts <= (ts + 1 00:00:00.000000000)) and (ts < (ts + 1 00:00:00.000000000)) and (ts >= (ts - 1 00:00:00.000000000)) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out index 13a8b35..014a546 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out @@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps) POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval PREHOOK: query: -- interval year-month arithmetic -explain +explain vectorization detail select dateval, dateval - interval '2-2' year to month, @@ -49,7 +49,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval year-month arithmetic -explain +explain vectorization detail select dateval, dateval - interval '2-2' year to month, @@ -62,6 +62,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -82,23 +86,61 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractIntervalYearMonthScalar[2:long], DateColSubtractIntervalYearMonthScalar[3:long], DateColAddIntervalYearMonthScalar[4:long], DateColAddIntervalYearMonthScalar[5:long], IntervalYearMonthScalarAddDateColumn[6:long], IntervalYearMonthScalarAddDateColumn[7:long] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], IdentityExpression[2:date], IdentityExpression[3:date], IdentityExpression[4:date], IdentityExpression[5:date], IdentityExpression[6:date] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -188,7 +230,7 @@ dateval c1 c2 c3 c4 c5 c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dateval, dateval - date '1999-06-07', @@ -197,7 +239,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dateval, dateval - date '1999-06-07', @@ -207,6 +249,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -227,23 +273,61 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractDateScalar[2:timestamp], DateScalarSubtractDateColumn[3:timestamp], DateColSubtractDateColumn[4:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:interval_day_time], IdentityExpression[2:interval_day_time], IdentityExpression[3:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -327,7 +411,7 @@ dateval c1 c2 c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tsval, tsval - interval '2-2' year to month, @@ -339,7 +423,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tsval, tsval - interval '2-2' year to month, @@ -352,6 +436,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -372,23 +460,61 @@ STAGE PLANS: Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:timestamp], TimestampColSubtractIntervalYearMonthScalar[2:timestamp], TimestampColSubtractIntervalYearMonthScalar[3:timestamp], TimestampColAddIntervalYearMonthScalar[4:timestamp], TimestampColAddIntervalYearMonthScalar[5:timestamp], IntervalYearMonthScalarAddTimestampColumn[6:timestamp], IntervalYearMonthScalarAddTimestampColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:timestamp], IdentityExpression[2:timestamp], IdentityExpression[3:timestamp], IdentityExpression[4:timestamp], IdentityExpression[5:timestamp], IdentityExpression[6:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -478,7 +604,7 @@ tsval c1 c2 c3 c4 c5 c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -486,7 +612,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -495,6 +621,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -513,25 +643,67 @@ STAGE PLANS: alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[0:long], ConstantVectorExpression[1:long] + vectorized: true Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -566,7 +738,7 @@ c0 c1 5-5 -1-1 5-5 -1-1 PREHOOK: query: -- interval day-time arithmetic -explain +explain vectorization detail select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -579,7 +751,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval day-time arithmetic -explain +explain vectorization detail select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -592,6 +764,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -612,23 +788,61 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractIntervalDayTimeScalar[2:timestamp], DateColSubtractIntervalDayTimeScalar[3:timestamp], DateColAddIntervalDayTimeScalar[4:timestamp], DateColAddIntervalDayTimeScalar[5:timestamp], IntervalDayTimeScalarAddDateColumn[6:timestamp], IntervalDayTimeScalarAddDateColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:timestamp], IdentityExpression[2:timestamp], IdentityExpression[3:timestamp], IdentityExpression[4:timestamp], IdentityExpression[5:timestamp], IdentityExpression[6:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -718,7 +932,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dateval, tsval, @@ -728,7 +942,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dateval, tsval, @@ -739,6 +953,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -759,23 +977,61 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:timestamp], DateColSubtractTimestampColumn[2:interval_day_time], TimestampColSubtractDateColumn[3:interval_day_time], TimestampColSubtractTimestampColumn[4:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:timestamp], IdentityExpression[2:interval_day_time], IdentityExpression[3:interval_day_time], IdentityExpression[4:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -861,7 +1117,7 @@ dateval tsval c2 c3 c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -873,7 +1129,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -886,6 +1142,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -906,23 +1166,61 @@ STAGE PLANS: Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:timestamp], TimestampColSubtractIntervalDayTimeScalar[2:timestamp], TimestampColSubtractIntervalDayTimeScalar[3:timestamp], TimestampColAddIntervalDayTimeScalar[4:timestamp], TimestampColAddIntervalDayTimeScalar[5:timestamp], IntervalDayTimeScalarAddTimestampColumn[6:timestamp], IntervalDayTimeScalarAddTimestampColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:timestamp], IdentityExpression[2:timestamp], IdentityExpression[3:timestamp], IdentityExpression[4:timestamp], IdentityExpression[5:timestamp], IdentityExpression[6:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1012,14 +1310,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -1027,6 +1325,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1044,12 +1346,28 @@ STAGE PLANS: Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:interval_day_time], ConstantVectorExpression[3:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1057,6 +1375,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 0bc0e4c..3ad8632 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -198,11 +202,23 @@ STAGE PLANS: alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean](DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]))) + vectorized: true predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]) + vectorized: true Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -210,6 +226,11 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -217,9 +238,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], IdentityExpression[8:string], IdentityExpression[14:interval_day_time] + vectorized: true Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -227,25 +259,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: vectortab_b_1korc Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean](DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]))) + vectorized: true predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]) + vectorized: true Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: interval_day_time) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: interval_day_time) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index bb6916b..8b92ae9 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -14,7 +14,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -22,7 +22,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -30,6 +30,10 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -50,39 +54,96 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -109,14 +170,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -124,15 +204,33 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -161,7 +259,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -169,7 +267,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -177,6 +275,10 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -199,13 +301,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -214,16 +336,41 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -250,14 +397,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -265,15 +431,33 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) @@ -302,7 +486,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -310,7 +494,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -318,6 +502,10 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -340,13 +528,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -355,28 +563,71 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -403,14 +654,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -442,7 +712,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -453,7 +723,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -464,6 +734,10 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -485,58 +759,141 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF sum parameter expression for GROUPBY operator: UDF GenericUDFHash(Column[_col2], Column[_col3]) not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -566,14 +923,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -581,28 +957,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -636,7 +1048,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -647,7 +1059,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -658,6 +1070,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -682,13 +1098,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -697,14 +1133,34 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -713,25 +1169,63 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -757,14 +1251,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -772,28 +1285,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -827,7 +1376,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -838,7 +1387,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -849,6 +1398,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -873,13 +1426,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -888,14 +1461,34 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -904,25 +1497,63 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -948,14 +1579,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -963,28 +1613,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1018,7 +1704,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1029,7 +1715,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1040,6 +1726,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1064,13 +1754,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -1079,14 +1789,34 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -1095,25 +1825,63 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1139,14 +1907,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1154,28 +1941,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1209,7 +2032,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1220,7 +2043,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1231,6 +2054,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1255,13 +2082,33 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -1270,14 +2117,34 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -1286,25 +2153,63 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -1330,14 +2235,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1345,28 +2269,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out index 16603c7..c21da5f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index b9ffa34..25066be 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,6 +72,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -84,6 +96,14 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -100,8 +120,23 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 1163d24..f3d6647 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -80,12 +80,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,12 +174,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -260,12 +268,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -286,6 +298,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -296,6 +314,12 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -303,9 +327,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -313,6 +348,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -321,15 +364,34 @@ STAGE PLANS: Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[2:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -350,12 +412,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -376,6 +442,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -386,6 +458,12 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -393,9 +471,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -403,6 +492,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -411,15 +508,34 @@ STAGE PLANS: Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[2:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -440,12 +556,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -466,6 +586,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -476,6 +602,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -483,9 +614,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -493,6 +635,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -501,15 +651,34 @@ STAGE PLANS: Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[2:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -530,12 +699,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -556,6 +729,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -566,6 +745,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -573,9 +757,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -583,6 +778,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -591,15 +794,34 @@ STAGE PLANS: Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:int], IdentityExpression[2:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 69911f5..75f3b34 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -132,91 +132,17 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PREHOOK: query: explain vectorization only summary -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -234,91 +160,15 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -338,91 +188,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -434,100 +208,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 #### A masked pattern was here #### POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY @@ -545,91 +239,15 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -644,95 +262,15 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -747,95 +285,15 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -847,91 +305,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -948,91 +330,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -1063,215 +369,39 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 #### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -1299,495 +429,57 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1799,18 +491,6 @@ POSTHOOK: Input: default@t3 0 0 0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 10 10 10 @@ -1819,116 +499,23 @@ POSTHOOK: Input: default@t3 4 8 8 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -1956,120 +543,78 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-1 - Tez +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2097,122 +642,30 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2240,133 +693,88 @@ POSTHOOK: Input: default@t3 10 10 10 -10 -10 -10 -10 -10 -10 -10 -10 -2 4 4 -5 -5 -5 8 8 -9 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -2415,83 +823,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -2503,10 +843,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2605,10 +951,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2709,10 +1061,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2805,10 +1163,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2916,10 +1280,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3015,10 +1385,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3118,10 +1494,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3218,10 +1600,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3319,10 +1707,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3434,10 +1828,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3534,10 +1934,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3670,10 +2076,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3780,10 +2192,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3919,10 +2337,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4052,10 +2476,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4193,10 +2623,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4334,10 +2770,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4477,10 +2919,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4631,10 +3079,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4786,10 +3240,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4874,10 +3334,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4885,80 +3351,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4976,10 +3458,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4987,80 +3475,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5080,10 +3584,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5091,80 +3601,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5176,10 +3702,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5187,84 +3719,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -5287,10 +3837,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5298,80 +3854,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterStringGroupColLessStringScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -5386,10 +3958,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5397,84 +3975,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -5489,10 +4085,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5500,84 +4102,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterStringGroupColLessEqualStringScalar[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -5589,10 +4209,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5600,80 +4226,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5690,10 +4332,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5701,80 +4349,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -5805,91 +4469,114 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongScalarMultiplyLongColumn[2:long]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn[1:long] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5905,10 +4592,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5916,104 +4609,123 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -6041,10 +4753,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6052,80 +4770,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -6151,10 +4885,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6162,107 +4902,130 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6290,10 +5053,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6301,89 +5070,99 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6423,10 +5202,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6434,94 +5219,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6564,10 +5348,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6575,94 +5365,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6705,105 +5494,110 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6848,10 +5642,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6859,94 +5659,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -7002,10 +5801,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7013,102 +5818,117 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -7157,10 +5977,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7168,72 +5994,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -7245,10 +6079,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7256,80 +6096,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7347,10 +6203,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7358,80 +6220,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7451,10 +6329,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7462,80 +6346,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7547,10 +6447,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7558,84 +6464,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -7658,10 +6582,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7669,80 +6599,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterStringGroupColLessStringScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -7757,10 +6703,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7768,84 +6720,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -7860,10 +6830,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7871,84 +6847,102 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterStringGroupColLessEqualStringScalar[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -7960,10 +6954,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -7971,80 +6971,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -8061,10 +7077,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8072,80 +7094,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -8176,10 +7214,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8187,80 +7231,97 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongScalarMultiplyLongColumn[2:long]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn[1:long] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -8276,10 +7337,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8287,104 +7354,123 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -8412,10 +7498,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8423,80 +7515,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -8522,10 +7630,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8533,107 +7647,130 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8661,10 +7798,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8672,89 +7815,99 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8794,10 +7947,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8805,94 +7964,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8935,10 +8093,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8946,94 +8110,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -9076,10 +8239,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9087,94 +8256,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -9219,10 +8387,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9230,94 +8404,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -9373,10 +8546,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9384,102 +8563,117 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -9528,10 +8722,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9539,72 +8739,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -9616,10 +8824,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9627,80 +8841,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9718,10 +8947,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9729,80 +8964,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9822,10 +9072,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9833,80 +9089,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9918,10 +9189,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9929,84 +9206,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -10029,10 +9323,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10040,80 +9340,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterStringGroupColLessStringScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -10128,10 +9443,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10139,84 +9460,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -10231,10 +9569,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10242,84 +9586,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterStringGroupColLessEqualStringScalar[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -10331,10 +9692,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10342,80 +9709,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -10432,10 +9814,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10443,80 +9831,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -10547,10 +9950,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10558,80 +9967,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongScalarMultiplyLongColumn[2:long]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn[1:long] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -10647,10 +10072,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10658,104 +10089,123 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -10783,10 +10233,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10794,80 +10250,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -10893,10 +10364,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10904,107 +10381,130 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11032,10 +10532,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11043,89 +10549,99 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11165,105 +10681,110 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11306,10 +10827,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11317,94 +10844,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11447,10 +10973,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11458,94 +10990,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11590,10 +11121,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11601,94 +11138,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vertices: + Map 1 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11744,10 +11280,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11755,102 +11297,115 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -11899,10 +11454,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11910,72 +11471,79 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -11987,10 +11555,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -11998,80 +11572,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12089,91 +11678,112 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12193,10 +11803,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12204,80 +11820,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12289,10 +11920,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12300,84 +11937,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -12400,10 +12054,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12411,80 +12071,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterStringGroupColLessStringScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -12499,10 +12174,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12510,84 +12191,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -12602,10 +12300,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12613,84 +12317,101 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterStringGroupColLessEqualStringScalar[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -12702,10 +12423,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12713,80 +12440,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColGreaterLongScalar[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12803,10 +12545,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -12814,80 +12562,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -12918,91 +12681,113 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](LongScalarMultiplyLongColumn[2:long]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: LongScalarMultiplyLongColumn[1:long] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -13018,10 +12803,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13029,104 +12820,123 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -13154,10 +12964,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13165,80 +12981,95 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -13264,10 +13095,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13275,107 +13112,130 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 1 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13403,10 +13263,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13414,89 +13280,99 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: One MapJoin Condition IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13536,10 +13412,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13547,94 +13429,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13677,10 +13558,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13688,94 +13575,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13818,105 +13704,110 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13961,10 +13852,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13972,94 +13869,93 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -14115,10 +14011,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14126,102 +14028,115 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -14270,10 +14185,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -14281,72 +14202,79 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true + Map Join Vectorization: + className: VectorMapJoinLeftSemiStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string] + vectorized: true + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index a075662..1647dcc 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -65,6 +69,10 @@ STAGE PLANS: value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -89,6 +97,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -109,10 +121,28 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -123,6 +153,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col2, _col4 input vertices: 0 Map 1 @@ -130,9 +165,20 @@ STAGE PLANS: Select Operator expressions: _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + vectorized: true Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -170,19 +216,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -226,6 +276,10 @@ STAGE PLANS: value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -250,6 +304,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -270,10 +328,28 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -284,6 +360,11 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + vectorized: true outputColumnNames: _col2, _col4 input vertices: 0 Map 1 @@ -291,9 +372,20 @@ STAGE PLANS: Select Operator expressions: _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + vectorized: true Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index b6a3b9a..d7ebd2b 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -211,7 +211,7 @@ POSTHOOK: Output: default@store PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -226,7 +226,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -238,6 +238,10 @@ explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -273,6 +277,12 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -293,6 +303,14 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -312,6 +330,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -359,6 +385,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -374,6 +407,13 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index 469c702..d537297 100644 --- ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -142,6 +146,14 @@ STAGE PLANS: name: default.orc_rn3 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index 4bfe41a..5f9df93 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization detail select NULL from a PREHOOK: type: QUERY POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization detail select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -87,12 +97,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +142,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Map 4 Map Operator Tree: TableScan @@ -149,8 +169,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + vectorized: false Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: void) diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 06e30d8..7241bc6 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -95,6 +99,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -108,6 +120,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -134,10 +154,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -184,6 +208,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -200,6 +232,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -216,6 +256,14 @@ STAGE PLANS: value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -233,10 +281,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -281,6 +333,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -294,6 +354,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -307,6 +375,14 @@ STAGE PLANS: value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -351,10 +427,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -402,6 +482,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -417,6 +505,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -432,6 +528,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -449,10 +553,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -497,6 +605,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -509,6 +625,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -521,6 +645,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -644,11 +776,15 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -690,6 +826,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -703,6 +847,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -729,10 +881,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -779,6 +935,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -795,6 +959,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -811,6 +983,14 @@ STAGE PLANS: value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -828,10 +1008,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -876,6 +1060,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -889,6 +1081,14 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -902,6 +1102,14 @@ STAGE PLANS: value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -946,10 +1154,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -997,6 +1209,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -1012,6 +1232,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -1027,6 +1255,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1044,10 +1280,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1092,6 +1332,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -1104,6 +1352,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -1116,6 +1372,14 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index b926ab4b..05c8563 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -1,13 +1,17 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -51,14 +55,18 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -98,14 +106,18 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -145,14 +157,18 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 503cf5b..5ff989e 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,9 +132,23 @@ STAGE PLANS: Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[7:boolean], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[7:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -139,15 +157,43 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -155,17 +201,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 6edcbeb..f8fbada 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -75,62 +79,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -152,12 +123,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -165,62 +140,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index cda039f..53cdacd 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -235,62 +239,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 32 Data size: 19648 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -329,18 +300,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,61 +323,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 112 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -527,7 +470,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -536,7 +479,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -545,6 +488,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -552,105 +499,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 32 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 240 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), sum(_col0) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 051911b..357ca68 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,105 +255,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 57 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 162 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index 5729237..863acdb 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,112 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 40 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 80 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 3","type":"BROADCAST_EDGE"},{"parent":"Map 4","type":"BROADCAST_EDGE"}],"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -375,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -384,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -393,112 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2939 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 21 Data size: 1869 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 3","type":"BROADCAST_EDGE"},{"parent":"Map 4","type":"BROADCAST_EDGE"}],"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -524,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -533,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -542,112 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 20 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 3","type":"BROADCAST_EDGE"},{"parent":"Map 4","type":"BROADCAST_EDGE"}],"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index 75d783f..985d4bd 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -246,82 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 30 Data size: 7167 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 2","type":"BROADCAST_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 2":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -394,81 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 2","type":"BROADCAST_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 2":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -897,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -906,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -915,112 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 81 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 1215 Data size: 9720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 3","type":"BROADCAST_EDGE"},{"parent":"Map 4","type":"BROADCAST_EDGE"}],"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out index 8a18738..28bb806 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out @@ -66,100 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -179,103 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -295,103 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -411,103 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6444 Data size: 51552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -527,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -536,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -545,119 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 9760469 Data size: 78083752 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 4","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map 1","type":"SIMPLE_EDGE"},{"parent":"Map 5","type":"SIMPLE_EDGE"}],"Reducer 3":{"parent":"Reducer 2","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 5":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{},"Reducer 3":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -743,100 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -856,103 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -972,103 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1088,103 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 40386 Data size: 323088 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 3","type":"BROADCAST_EDGE"},"Reducer 2":{"parent":"Map 1","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1204,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1213,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1222,119 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 151450 Data size: 605800 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 pmod 8) (type: bigint) - sort order: + - Map-reduce partition columns: (_col0 pmod 8) (type: bigint) - Statistics: Num rows: 100 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6058 Data size: 24232 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 152914016 Data size: 1223312128 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":{"parent":"Map 4","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map 1","type":"SIMPLE_EDGE"},{"parent":"Map 5","type":"SIMPLE_EDGE"}],"Reducer 3":{"parent":"Reducer 2","type":"SIMPLE_EDGE"}},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 4":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 5":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Reducer 2":{},"Reducer 3":{"Reduce Vectorization:":{"enabled:":"true","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true","hive.execution.engine tez IN [tez, spark] IS true"],"groupByVectorOutput:":"true","allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out index 9369661..a069df3 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out @@ -126,113 +126,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 2","type":"BROADCAST_EDGE"},{"parent":"Map 3","type":"BROADCAST_EDGE"}]},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 2":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -251,108 +153,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 2 - Map Operator Tree: - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Tez":{"Edges:":{"Map 1":[{"parent":"Map 2","type":"BROADCAST_EDGE"},{"parent":"Map 3","type":"BROADCAST_EDGE"}]},"Vertices:":{"Map 1":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 2":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}},"Map 3":{"Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"true","usesVectorUDFAdaptor:":"false","vectorized:":"true"}}}}},"Stage-0":{}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index ebe895f..673133c 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -76,12 +76,16 @@ POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(invent POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -102,28 +106,74 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -204,12 +254,16 @@ POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(in POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -230,28 +284,74 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -334,12 +434,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2a POSTHOOK: Input: default@inventory_part_2a@par=2 POSTHOOK: Output: default@inventory_part_2a@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -360,28 +464,74 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -445,12 +595,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2b POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -471,28 +625,74 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -562,12 +762,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_3 POSTHOOK: Input: default@inventory_part_3@par=2 POSTHOOK: Output: default@inventory_part_3@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -588,28 +792,74 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 9eeb0d6..e5c2370 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -283,43 +287,110 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:date], IdentityExpression[3:timestamp], IdentityExpression[4:float], IdentityExpression[5:int] + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[1:date], IdentityExpression[4:timestamp], IdentityExpression[5:float], IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[1:date], IdentityExpression[4:timestamp], IdentityExpression[5:float], IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -365,12 +436,16 @@ Chicago New York 2010-10-24 2010-10-24 07:00:00 113.0 897 Chicago New York 2010-10-25 2010-10-25 07:00:00 -1.0 897 Chicago New York 2010-10-26 2010-10-26 07:00:00 0.0 897 Chicago New York 2010-10-27 2010-10-27 07:00:00 -11.0 897 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -391,9 +466,23 @@ STAGE PLANS: Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:date] + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -402,21 +491,54 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 68 Data size: 19584 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -828,12 +950,16 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1018,12 +1144,16 @@ Chicago New York 2010-10-31 07:00:00 -4.0 1531 2010-10-31 Chicago New York 2010-10-31 07:00:00 -22.0 1610 2010-10-31 Chicago New York 2010-10-31 07:00:00 -15.0 3198 2010-10-31 Washington New York 2010-10-31 07:00:00 -18.0 7282 2010-10-31 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1045,43 +1175,110 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:int], IdentityExpression[5:date] + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[4:timestamp], IdentityExpression[5:float], IdentityExpression[0:int], IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[4:timestamp], IdentityExpression[5:float], IdentityExpression[0:int], IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1151,12 +1348,16 @@ Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 Chicago New York 2010-10-25 07:00:00 -1.0 897 2010-10-25 Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1177,9 +1378,23 @@ STAGE PLANS: Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:date] + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -1188,21 +1403,54 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1638,12 +1886,16 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- 2010-10-29 07:00:00 12 2010-10-30 07:00:00 11 2010-10-31 07:00:00 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1828,12 +2080,16 @@ Chicago New York 2010-10-31 -4.0 1531 2010-10-31 07:00:00 Chicago New York 2010-10-31 -22.0 1610 2010-10-31 07:00:00 Chicago New York 2010-10-31 -15.0 3198 2010-10-31 07:00:00 Washington New York 2010-10-31 -18.0 7282 2010-10-31 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1855,43 +2111,110 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:date], IdentityExpression[3:float], IdentityExpression[4:int], IdentityExpression[5:timestamp] + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[4:date], IdentityExpression[5:float], IdentityExpression[0:int], IdentityExpression[1:timestamp] + vectorized: true Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], IdentityExpression[3:string], IdentityExpression[4:date], IdentityExpression[5:float], IdentityExpression[0:int], IdentityExpression[1:timestamp] + vectorized: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 1000 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1961,12 +2284,16 @@ Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 Chicago New York 2010-10-25 -1.0 897 2010-10-25 07:00:00 Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 Chicago New York 2010-10-27 -11.0 897 2010-10-27 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1987,9 +2314,23 @@ STAGE PLANS: Select Operator expressions: fl_time (type: timestamp) outputColumnNames: fl_time + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:timestamp] + vectorized: true Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:timestamp] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_time (type: timestamp) mode: hash outputColumnNames: _col0, _col1 @@ -1998,21 +2339,54 @@ STAGE PLANS: key expressions: _col0 (type: timestamp) sort order: + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:timestamp] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce1.q.out ql/src/test/results/clientpositive/llap/vector_reduce1.q.out index 4c252c7..bafa24b 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce1.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,22 +135,60 @@ STAGE PLANS: Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce2.q.out ql/src/test/results/clientpositive/llap/vector_reduce2.q.out index a4ce890..f07793b 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,22 +135,60 @@ STAGE PLANS: Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], IdentityExpression[2:int], IdentityExpression[9:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:int], IdentityExpression[2:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce3.q.out ql/src/test/results/clientpositive/llap/vector_reduce3.q.out index d34113c..1aae26a 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,22 +135,60 @@ STAGE PLANS: Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 9571b5b..08fec5a 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,10 +51,24 @@ STAGE PLANS: alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[4:long]) SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[4:long])) + vectorized: true predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(IdentityExpression[2:decimal(20,10)]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:double], IdentityExpression[2:decimal(20,10)], IdentityExpression[3:decimal(23,14)] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -59,16 +77,45 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + vectorized: true Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(IdentityExpression[4:decimal(20,10)]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:double], IdentityExpression[2:decimal(20,10)], IdentityExpression[3:decimal(23,14)] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -76,21 +123,50 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:double], IdentityExpression[2:decimal(20,10)], IdentityExpression[3:decimal(23,14)], IdentityExpression[4:decimal(20,10)] + vectorized: true Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index ca3d2fa..8a584c9 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -97,16 +97,20 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -265,20 +269,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -300,8 +308,21 @@ STAGE PLANS: Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringGroupConcatColCol[19:String_Family](StringGroupColConcatStringScalar[17:String_Family](StringScalarConcatStringGroupCol[18:String_Family](CastLongToString[17:String](CastDoubleToLong[13:long](DoubleColAddDoubleScalar[15:double](DoubleColDivideDoubleScalar[16:double](CastLongToDouble[15:double](LongColSubtractLongScalar[14:long](VectorUDFMonthDate[13:long])))))))) CastLongToString[18:String](VectorUDFYearDate[13:long])) + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[19:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -310,14 +331,42 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -325,20 +374,49 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_struct_in.q.out ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index 0aa1e70..9312c95 100644 --- ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -22,7 +22,7 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -36,7 +36,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -92,7 +92,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -106,7 +106,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -180,7 +180,7 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -194,7 +194,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -250,7 +250,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -264,7 +264,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -338,7 +338,7 @@ POSTHOOK: Input: default@values__tmp__table__3 POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -352,7 +352,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -408,7 +408,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -422,7 +422,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -497,7 +497,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -512,7 +512,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -570,7 +570,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -585,7 +585,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), diff --git ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out index 7d14256..b5b5504 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -146,9 +150,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToVarChar[13:VarChar], CastLongToVarChar[14:VarChar], CastLongToVarChar[15:VarChar], CastLongToVarChar[16:VarChar], VectorUDFAdaptor[17:varchar(20)], VectorUDFAdaptor[18:varchar(20)], CastStringGroupToVarChar[19:VarChar] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.varchar_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out index 9c2c536..09259c8 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -192,8 +204,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) @@ -225,11 +252,15 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,6 +303,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -292,8 +331,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) @@ -327,11 +381,15 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -374,6 +432,14 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -394,8 +460,23 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out index edb67f1..e83dc27 100644 --- ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,8 +88,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -148,16 +167,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -187,8 +210,23 @@ STAGE PLANS: value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) @@ -254,12 +292,16 @@ create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -282,33 +324,87 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToVarChar[1:VarChar] + vectorized: true Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 5a3cfe4..73f66f6 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -20,12 +20,16 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,9 +50,23 @@ STAGE PLANS: Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], VectorUDFAdaptor[3:Long](NotCol[2:boolean]) + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -57,21 +75,54 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index faceb5c..bcb6a2d 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -41,40 +45,105 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFCount(IdentityExpression[0:tinyint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[1:tinyint]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:tinyint], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -106,16 +175,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -137,39 +210,104 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -195,7 +333,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -208,7 +346,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -221,6 +359,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,8 +396,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -269,6 +424,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -317,7 +479,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -325,7 +487,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -333,6 +495,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -354,40 +520,105 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[3:bigint]), VectorUDAFMaxLong(IdentityExpression[3:bigint]), VectorUDAFCount(IdentityExpression[3:bigint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:bigint]), VectorUDAFMaxLong(IdentityExpression[1:bigint]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:bigint], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -419,16 +650,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -450,39 +685,104 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -508,7 +808,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -521,7 +821,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -534,6 +834,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -567,8 +871,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -582,6 +899,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -630,7 +954,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -638,7 +962,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -646,6 +970,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -667,40 +995,105 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(IdentityExpression[4:float]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFCount(IdentityExpression[4:float]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(IdentityExpression[0:float]), VectorUDAFMaxDouble(IdentityExpression[1:float]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:float], IdentityExpression[1:float], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -732,16 +1125,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -763,39 +1160,104 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(IdentityExpression[0:double]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -821,7 +1283,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -834,7 +1296,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -847,6 +1309,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -880,8 +1346,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -895,6 +1374,13 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) @@ -944,7 +1430,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -971,7 +1457,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -998,6 +1484,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1016,14 +1506,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterStringColLikeStringScalar[-1:boolean] FilterDecimalScalarNotEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(13,3)]) FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterLongColEqualLongScalar[-1:boolean] FilterLongScalarEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]))) + vectorized: true predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint], IdentityExpression[4:float], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(IdentityExpression[3:bigint]), VectorUDAFStdPopLong(IdentityExpression[3:bigint]), VectorUDAFVarSampLong(IdentityExpression[3:bigint]), VectorUDAFCountStar, VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE @@ -1033,8 +1543,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 8cf503f..267f90e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,14 +91,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -107,8 +132,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -127,16 +165,39 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:tinyint], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:string], IdentityExpression[5:tinyint], IdentityExpression[6:tinyint], IdentityExpression[7:tinyint], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:float], IdentityExpression[12:double], IdentityExpression[10:double], IdentityExpression[14:double], IdentityExpression[15:decimal(7,3)], IdentityExpression[16:double], IdentityExpression[17:double], IdentityExpression[18:float], IdentityExpression[19:double], IdentityExpression[20:tinyint] + vectorized: true Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -258,7 +319,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -292,7 +353,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -325,6 +386,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -344,14 +409,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -364,8 +450,21 @@ STAGE PLANS: value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -384,16 +483,39 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:tinyint], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:string], IdentityExpression[5:tinyint], IdentityExpression[6:tinyint], IdentityExpression[7:tinyint], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:float], IdentityExpression[12:double], IdentityExpression[10:double], IdentityExpression[14:double], IdentityExpression[15:decimal(7,3)], IdentityExpression[16:double], IdentityExpression[17:double], IdentityExpression[18:float], IdentityExpression[19:double], IdentityExpression[20:tinyint] + vectorized: true Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index c227e44..e6fdca9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -107,8 +111,21 @@ STAGE PLANS: value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) @@ -127,6 +144,13 @@ STAGE PLANS: value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 768aed4..a9908a4 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -103,8 +107,21 @@ STAGE PLANS: value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) @@ -123,6 +140,13 @@ STAGE PLANS: value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index a1eb629..22041cc 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -80,8 +84,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 3a77cc9..007ce8f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,8 +85,23 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 54cc498..37cafd6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -74,31 +78,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint], IdentityExpression[2:smallint], IdentityExpression[3:tinyint], IdentityExpression[4:timestamp], IdentityExpression[5:string], IdentityExpression[6:bigint], IdentityExpression[7:int], IdentityExpression[8:smallint], IdentityExpression[9:tinyint], IdentityExpression[10:int], IdentityExpression[11:bigint], IdentityExpression[12:int], IdentityExpression[9:tinyint], IdentityExpression[14:tinyint] + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,7 +246,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -221,7 +274,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -248,6 +301,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -266,31 +323,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint], IdentityExpression[2:smallint], IdentityExpression[3:tinyint], IdentityExpression[4:timestamp], IdentityExpression[5:string], IdentityExpression[6:bigint], IdentityExpression[7:int], IdentityExpression[8:smallint], IdentityExpression[9:tinyint], IdentityExpression[10:int], IdentityExpression[11:bigint], IdentityExpression[12:int], IdentityExpression[9:tinyint], IdentityExpression[14:tinyint] + vectorized: true Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 70ef835..bc14376 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -70,31 +74,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:double], IdentityExpression[2:boolean], IdentityExpression[3:string], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:float], IdentityExpression[9:double], IdentityExpression[5:double], IdentityExpression[11:float], IdentityExpression[12:float], IdentityExpression[13:double] + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -180,7 +233,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -206,7 +259,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -231,6 +284,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -249,31 +306,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:double], IdentityExpression[2:boolean], IdentityExpression[3:string], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:float], IdentityExpression[9:double], IdentityExpression[5:double], IdentityExpression[11:float], IdentityExpression[12:float], IdentityExpression[13:double] + vectorized: true Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index a1eb629..22041cc 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -80,8 +84,21 @@ STAGE PLANS: value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out index 9a6cb52..d67ef1f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 2078e81..872e7f3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,8 +82,23 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out index 6bd4bd6..6324e01 100644 --- ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -39,8 +43,21 @@ STAGE PLANS: value expressions: _col0 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index a14d515..8eee9d8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,6 +136,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,59 +147,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) - outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint - Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongScalarEqualLongColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[12:double]) FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleColumn[-1:boolean](CastLongToDouble[12:double])) FilterStringGroupColEqualStringScalar[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDecimalColLessEqualDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(22,3)]) FilterStringGroupColNotEqualStringScalar[-1:boolean] FilterDecimalScalarNotEqualDecimalColumn[-1:boolean](CastLongToDecimal[14:decimal(13,3)]) FilterLongColNotEqualLongColumn[-1:boolean])) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[5:double], IdentityExpression[1:smallint], IdentityExpression[4:float], IdentityExpression[0:tinyint] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFAvgLong(IdentityExpression[2:int]), VectorUDAFSumDouble(IdentityExpression[5:double]), VectorUDAFStdPopLong(IdentityExpression[2:int]), VectorUDAFStdSampLong(IdentityExpression[1:smallint]), VectorUDAFVarSampLong(IdentityExpression[2:int]), VectorUDAFAvgDouble(IdentityExpression[4:float]), VectorUDAFStdSampLong(IdentityExpression[2:int]), VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFCount(IdentityExpression[1:smallint]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT AVG(cint), (AVG(cint) + -3728), @@ -277,7 +274,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -315,7 +313,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -348,6 +347,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -355,59 +358,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2036734 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) - Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) - outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint - Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongColLessEqualLongScalar[-1:boolean] FilterLongColLessLongColumn[-1:boolean](IdentityExpression[2:int])) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleScalar[-1:boolean] FilterDoubleColGreaterDoubleColumn[-1:boolean](CastLongToDouble[12:double])) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[12:double]) FilterStringColRegExpStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean] FilterStringColLikeStringScalar[-1:boolean])) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[5:double], IdentityExpression[0:tinyint] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[2:int]), VectorUDAFVarPopLong(IdentityExpression[3:bigint]), VectorUDAFStdPopLong(IdentityExpression[1:smallint]), VectorUDAFMaxDouble(IdentityExpression[5:double]), VectorUDAFAvgLong(IdentityExpression[0:tinyint]), VectorUDAFMinLong(IdentityExpression[2:int]), VectorUDAFMinDouble(IdentityExpression[5:double]), VectorUDAFStdSampLong(IdentityExpression[1:smallint]), VectorUDAFVarSampLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT MAX(cint), (MAX(cint) / -3728), @@ -485,7 +479,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -522,7 +517,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -554,6 +550,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -561,59 +561,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) - outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterTimestampColEqualTimestampColumn[-1:boolean] FilterDoubleScalarEqualDoubleColumn[-1:boolean] FilterStringGroupColEqualStringScalar[-1:boolean] FilterExprAndExpr[-1:boolean](FilterLongColLessEqualLongColumn[-1:boolean](IdentityExpression[1:smallint]) FilterLongScalarEqualLongColumn[-1:boolean]) FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean] FilterStringGroupColGreaterStringScalar[-1:boolean])) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint], IdentityExpression[0:tinyint], IdentityExpression[1:smallint], IdentityExpression[2:int], IdentityExpression[5:double] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(IdentityExpression[3:bigint]), VectorUDAFCountStar, VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFStdPopLong(IdentityExpression[1:smallint]), VectorUDAFMaxLong(IdentityExpression[2:int]), VectorUDAFStdSampDouble(IdentityExpression[5:double]), VectorUDAFCount(IdentityExpression[0:tinyint]), VectorUDAFAvgLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(IdentityExpression[5:double]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -690,7 +681,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -717,7 +709,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -739,6 +732,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -746,59 +743,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cint, cfloat - Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterTimestampColLessEqualTimestampColumn[-1:boolean] FilterDoubleColNotEqualDoubleColumn[-1:boolean](CastLongToDouble[12:double]) FilterStringScalarLessEqualStringGroupColumn[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterLongColLessLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterDoubleColGreaterEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColEqualDoubleScalar[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[3:bigint], IdentityExpression[2:int], IdentityExpression[4:float] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFAvgLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[3:bigint]), VectorUDAFStdSampLong(IdentityExpression[2:int]), VectorUDAFVarPopLong(IdentityExpression[2:int]), VectorUDAFVarPopLong(IdentityExpression[3:bigint]), VectorUDAFMaxDouble(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(ctinyint), @@ -855,7 +843,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -896,7 +885,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -932,6 +922,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -939,53 +933,66 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 3056470 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) - Statistics: Num rows: 9898 Data size: 2462086 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterStringColRegExpStringScalar[-1:boolean] FilterStringColLikeStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterLongScalarNotEqualLongColumn[-1:boolean] FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[12:decimal(8,3)]) FilterLongScalarNotEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint])) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterDoubleColGreaterEqualDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[13:double])) FilterExprAndExpr[-1:boolean](FilterLongColLessLongColumn[-1:boolean](IdentityExpression[2:int]) FilterLongColGreaterLongColumn[-1:boolean](IdentityExpression[0:tinyint]))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[5:double], IdentityExpression[9:timestamp], IdentityExpression[6:string], IdentityExpression[11:boolean], IdentityExpression[0:tinyint], IdentityExpression[4:float], IdentityExpression[8:timestamp], IdentityExpression[1:smallint], IdentityExpression[3:bigint], LongScalarMultiplyLongColumn[14:long], LongColUnaryMinus[15:long], DecimalScalarSubtractDecimalColumn[17:decimal(14,3)](CastLongToDecimal[16:decimal(10,0)]), LongColUnaryMinus[18:long], LongColSubtractLongColumn[20:long](LongColUnaryMinus[19:long]), LongColAddLongColumn[22:long](LongColSubtractLongColumn[21:long](LongColUnaryMinus[19:long]) LongColUnaryMinus[19:long]), DoubleColDivideDoubleColumn[24:double](CastLongToDouble[13:double] CastLongToDouble[23:double]), DecimalColSubtractDecimalScalar[26:decimal(15,3)](DecimalScalarSubtractDecimalColumn[25:decimal(14,3)](CastLongToDecimal[16:decimal(10,0)])), DoubleColUnaryMinus[13:double], DoubleColMultiplyDoubleScalar[23:double], DoubleColDivideDoubleScalar[28:double](CastLongToDouble[27:double]), LongColUnaryMinus[19:long], DecimalScalarDivideDecimalColumn[30:decimal(20,18)](CastLongToDecimal[29:decimal(3,0)]) + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:double], IdentityExpression[2:timestamp], IdentityExpression[3:string], IdentityExpression[4:boolean], IdentityExpression[5:tinyint], IdentityExpression[6:float], IdentityExpression[7:timestamp], IdentityExpression[8:smallint], IdentityExpression[9:bigint], IdentityExpression[10:bigint], IdentityExpression[11:int], IdentityExpression[12:decimal(14,3)], IdentityExpression[13:smallint], IdentityExpression[14:smallint], IdentityExpression[15:smallint], IdentityExpression[16:double], IdentityExpression[17:decimal(15,3)], IdentityExpression[18:float], IdentityExpression[19:double], IdentityExpression[20:double], IdentityExpression[21:tinyint], IdentityExpression[22:decimal(20,18)] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cdouble, @@ -1118,7 +1125,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1158,7 +1166,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1193,6 +1202,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1200,53 +1213,66 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) - sort order: +++++++++++++++++++++++++ - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongScalarGreaterLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[2:int])) FilterLongColEqualLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean] FilterExprAndExpr[-1:boolean](FilterStringColLikeStringScalar[-1:boolean] FilterDoubleColLessEqualDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[12:double]))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[3:bigint], IdentityExpression[6:string], IdentityExpression[10:boolean], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[9:timestamp], IdentityExpression[1:smallint], IdentityExpression[7:string], IdentityExpression[11:boolean], DoubleColDivideDoubleColumn[14:double](CastLongToDouble[12:double] CastLongToDouble[13:double]), DecimalColModuloDecimalScalar[16:decimal(5,3)](CastLongToDecimal[15:decimal(19,0)]), DoubleColUnaryMinus[12:double](DoubleColDivideDoubleColumn[17:double](CastLongToDouble[12:double] CastLongToDouble[13:double])), DoubleScalarModuloDoubleColumn[13:double], DoubleColUnaryMinus[17:double], DoubleColSubtractDoubleColumn[19:double](DoubleColUnaryMinus[18:double]), DoubleColModuloDoubleScalar[18:double](DoubleColSubtractDoubleColumn[20:double](DoubleColUnaryMinus[18:double])), DoubleColMultiplyDoubleColumn[21:double](CastLongToDouble[20:double]), DoubleColUnaryMinus[20:double], LongColUnaryMinus[22:long], DoubleColSubtractDoubleColumn[23:double](IdentityExpression[4:float] DoubleColDivideDoubleColumn[25:double](CastLongToDouble[23:double] CastLongToDouble[24:double])), LongColUnaryMinus[26:long], LongScalarModuloLongColumn[27:long], DoubleScalarSubtractDoubleColumn[24:double], LongColUnaryMinus[28:long] + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:bigint], IdentityExpression[2:string], IdentityExpression[3:boolean], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[6:timestamp], IdentityExpression[7:smallint], IdentityExpression[8:string], IdentityExpression[9:boolean], IdentityExpression[10:double], IdentityExpression[11:decimal(5,3)], IdentityExpression[12:double], IdentityExpression[13:float], IdentityExpression[14:float], IdentityExpression[15:float], IdentityExpression[16:float], IdentityExpression[17:double], IdentityExpression[18:double], IdentityExpression[19:bigint], IdentityExpression[20:double], IdentityExpression[21:smallint], IdentityExpression[22:bigint], IdentityExpression[23:double], IdentityExpression[21:smallint] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 25 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cbigint, @@ -1330,7 +1356,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1369,7 +1396,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1403,6 +1431,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1410,54 +1442,66 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2312410 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDecimalColGreaterDecimalScalar[-1:boolean](CastLongToDecimal[12:decimal(7,2)]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColLessEqualDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterStringGroupColGreaterEqualStringScalar[-1:boolean] FilterDoubleColNotEqualDoubleColumn[-1:boolean](CastLongToDouble[13:double])) FilterLongColEqualLongScalar[-1:boolean](IdentityExpression[0:tinyint]) FilterExprAndExpr[-1:boolean](FilterDoubleColLessEqualDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[13:double]) FilterDecimalScalarLessEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(7,2)]))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[6:string], IdentityExpression[11:boolean], IdentityExpression[9:timestamp], IdentityExpression[5:double], IdentityExpression[4:float], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[10:boolean], LongColAddLongColumn[14:long](IdentityExpression[1:smallint]), LongColSubtractLongColumn[15:long](IdentityExpression[0:tinyint]), LongColUnaryMinus[16:long], DoubleColUnaryMinus[13:double], LongColAddLongColumn[18:long](LongColSubtractLongColumn[17:long](IdentityExpression[0:tinyint])), DoubleColDivideDoubleColumn[19:double], DoubleColUnaryMinus[20:double], LongColMultiplyLongColumn[22:long](IdentityExpression[17:long](LongColAddLongColumn[17:long](IdentityExpression[1:smallint])) LongColUnaryMinus[21:long]), DoubleColAddDoubleColumn[25:double](DoubleColUnaryMinus[23:double] CastLongToDouble[24:double]), DecimalScalarDivideDecimalColumn[27:decimal(19,18)](CastLongToDecimal[26:decimal(3,0)]), DoubleColModuloDoubleColumn[24:double](CastLongToDouble[23:double]), LongColUnaryMinus[17:long], LongColAddLongColumn[28:long](IdentityExpression[1:smallint] LongColAddLongColumn[21:long](IdentityExpression[1:smallint])) + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 75 - Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[7:int], IdentityExpression[1:string], IdentityExpression[23:boolean], IdentityExpression[2:timestamp], IdentityExpression[6:double], IdentityExpression[3:float], IdentityExpression[4:bigint], IdentityExpression[8:smallint], IdentityExpression[0:boolean], IdentityExpression[10:int], IdentityExpression[11:bigint], IdentityExpression[12:bigint], IdentityExpression[13:float], IdentityExpression[14:bigint], IdentityExpression[15:double], IdentityExpression[16:double], IdentityExpression[17:bigint], IdentityExpression[18:double], IdentityExpression[19:decimal(19,18)], IdentityExpression[20:double], IdentityExpression[21:smallint], IdentityExpression[22:int] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 75 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cint, @@ -1612,7 +1656,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1644,7 +1689,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1671,6 +1717,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1678,54 +1728,66 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2528254 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) - Statistics: Num rows: 3868 Data size: 795962 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) - sort order: +++++++++++++++ - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: timestamp) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDecimalScalarGreaterEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(13,3)]) FilterLongColLessLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterLongScalarGreaterLongColumn[-1:boolean](IdentityExpression[1:smallint])) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleColumn[-1:boolean](IdentityExpression[4:float]) FilterStringGroupColLessEqualStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterStringColLikeStringScalar[-1:boolean] FilterDecimalScalarGreaterDecimalColumn[-1:boolean](CastLongToDecimal[13:decimal(22,3)]))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[7:string], IdentityExpression[5:double], IdentityExpression[4:float], IdentityExpression[3:bigint], IdentityExpression[1:smallint], DoubleColDivideDoubleScalar[15:double](CastLongToDouble[14:double]), LongScalarSubtractLongColumn[16:long](IdentityExpression[1:smallint]), DoubleScalarMultiplyDoubleColumn[14:double], DoubleColUnaryMinus[17:double], DoubleColMultiplyDoubleScalar[18:double], DoubleColDivideDoubleColumn[20:double](IdentityExpression[19:double](DoubleScalarMultiplyDoubleColumn[19:double]) IdentityExpression[4:float]), DoubleColUnaryMinus[19:double], LongColModuloLongColumn[21:long](IdentityExpression[1:smallint]), DoubleColUnaryMinus[22:double], DoubleColMultiplyDoubleColumn[24:double](DoubleColUnaryMinus[23:double]) + vectorized: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 45 - Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[15:timestamp], IdentityExpression[1:string], IdentityExpression[2:double], IdentityExpression[3:float], IdentityExpression[4:bigint], IdentityExpression[0:smallint], IdentityExpression[5:double], IdentityExpression[6:int], IdentityExpression[7:float], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:float], IdentityExpression[12:int], IdentityExpression[8:double], IdentityExpression[14:double] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 45 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring2, @@ -1835,7 +1897,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1860,7 +1923,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1880,6 +1944,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1887,79 +1955,77 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) - Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) - outputColumnNames: csmallint, cbigint, ctinyint - Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() - keys: csmallint (type: smallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongScalar[-1:boolean](IdentityExpression[1:smallint]) FilterExprOrExpr[-1:boolean](FilterLongScalarEqualLongColumn[-1:boolean](IdentityExpression[1:smallint]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleColumn[-1:boolean](CastLongToDouble[12:double]) FilterLongColLessEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint])))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], IdentityExpression[3:bigint], IdentityExpression[0:tinyint] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(IdentityExpression[1:smallint]), VectorUDAFSumLong(IdentityExpression[3:bigint]), VectorUDAFVarPopLong(IdentityExpression[0:tinyint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[1:smallint] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) - keys: KEY._col0 (type: smallint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1128 Data size: 39468 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(19,18)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(19,18)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) - sort order: +++++++++++ - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reducer 3 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:smallint], IdentityExpression[1:int], IdentityExpression[2:double], IdentityExpression[3:decimal(19,18)], IdentityExpression[4:bigint], IdentityExpression[5:double], IdentityExpression[6:int], IdentityExpression[7:double], IdentityExpression[8:int], IdentityExpression[9:bigint], IdentityExpression[10:bigint] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 20 Processor Tree: - ListSink PREHOOK: query: SELECT csmallint, (csmallint % -75) as c1, @@ -2031,7 +2097,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2063,7 +2130,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2090,6 +2158,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2097,76 +2169,72 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) - Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cfloat (type: float) - outputColumnNames: cdouble, cfloat - Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongColumn[-1:boolean](IdentityExpression[2:int]) FilterLongColLessLongColumn[-1:boolean](IdentityExpression[1:smallint]) FilterDoubleColLessDoubleScalar[-1:boolean]) FilterDecimalScalarEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(6,2)]) FilterExprAndExpr[-1:boolean](FilterDoubleColLessEqualDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterDecimalScalarGreaterDecimalColumn[-1:boolean](CastLongToDecimal[14:decimal(21,2)])))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:double], IdentityExpression[4:float] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFVarSampDouble(IdentityExpression[5:double]), VectorUDAFCount(IdentityExpression[4:float]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFVarPopDouble(IdentityExpression[5:double]), VectorUDAFStdPopDouble(IdentityExpression[5:double]), VectorUDAFSumDouble(IdentityExpression[5:double]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[5:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(IdentityExpression[5:double]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(IdentityExpression[5:double]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(IdentityExpression[5:double]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 870 Data size: 46968 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double], IdentityExpression[1:double], IdentityExpression[2:double], IdentityExpression[3:double], IdentityExpression[4:bigint], IdentityExpression[5:double], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:double], IdentityExpression[12:double], IdentityExpression[13:double], IdentityExpression[14:double], IdentityExpression[13:double] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cdouble, @@ -2232,7 +2300,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2292,7 +2361,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2347,6 +2417,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2354,79 +2428,77 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 5199016 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) - Statistics: Num rows: 6144 Data size: 5199016 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongScalarNotEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) SelectColumnIsNotNull[-1:boolean] FilterStringColRegExpStringScalar[-1:boolean] FilterDoubleScalarLessDoubleColumn[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterStringColLikeStringScalar[-1:boolean]) FilterDoubleColEqualDoubleColumn[-1:boolean](CastLongToDouble[12:double]) FilterExprAndExpr[-1:boolean](SelectColumnIsNull[-1:boolean] FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToFloatViaLongToDouble[12:double])))) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[6:string], IdentityExpression[2:int], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[4:float], IdentityExpression[5:double] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFStdPopLong(IdentityExpression[2:int]), VectorUDAFAvgLong(IdentityExpression[1:smallint]), VectorUDAFCountStar, VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFVarSampLong(IdentityExpression[1:smallint]), VectorUDAFVarPopDouble(IdentityExpression[4:float]), VectorUDAFAvgLong(IdentityExpression[2:int]), VectorUDAFVarSampDouble(IdentityExpression[4:float]), VectorUDAFAvgDouble(IdentityExpression[4:float]), VectorUDAFMinDouble(IdentityExpression[5:double]), VectorUDAFVarPopLong(IdentityExpression[1:smallint]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFSumLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[8:timestamp], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 3072 Data size: 645716 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(20,18)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(20,18)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) - sort order: +++++++++++++++++++++++++++++++++++++++ - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reducer 3 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:string], IdentityExpression[2:double], IdentityExpression[3:double], IdentityExpression[4:double], IdentityExpression[5:double], IdentityExpression[4:double], IdentityExpression[7:double], IdentityExpression[8:bigint], IdentityExpression[9:bigint], IdentityExpression[10:double], IdentityExpression[11:tinyint], IdentityExpression[12:double], IdentityExpression[13:double], IdentityExpression[14:double], IdentityExpression[15:double], IdentityExpression[16:double], IdentityExpression[17:double], IdentityExpression[18:double], IdentityExpression[19:double], IdentityExpression[20:double], IdentityExpression[21:double], IdentityExpression[22:double], IdentityExpression[23:double], IdentityExpression[24:double], IdentityExpression[25:double], IdentityExpression[26:double], IdentityExpression[27:tinyint], IdentityExpression[28:double], IdentityExpression[29:double], IdentityExpression[30:double], IdentityExpression[31:double], IdentityExpression[32:decimal(20,18)], IdentityExpression[33:double], IdentityExpression[34:bigint], IdentityExpression[35:double], IdentityExpression[36:bigint], IdentityExpression[8:bigint], IdentityExpression[38:double] + vectorized: true + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring1, @@ -2597,7 +2669,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2642,7 +2715,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2682,6 +2756,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2689,76 +2767,72 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) - outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) - keys: cboolean1 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 3 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - Execution mode: vectorized, llap - LLAP IO: all inputs + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean] FilterDecimalColLessEqualDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(22,3)])) FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongScalar[-1:boolean] SelectColumnIsNotNull[-1:boolean] FilterLongColGreaterEqualLongScalar[-1:boolean]) FilterStringColRegExpStringScalar[-1:boolean] FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) SelectColumnIsNull[-1:boolean])) SelectColumnIsNotNull[-1:boolean]) + vectorized: true + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[4:float], IdentityExpression[3:bigint], IdentityExpression[2:int], IdentityExpression[5:double], IdentityExpression[0:tinyint], IdentityExpression[1:smallint] + vectorized: true + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFSumLong(IdentityExpression[3:bigint]), VectorUDAFVarSampLong(IdentityExpression[2:int]), VectorUDAFAvgDouble(IdentityExpression[5:double]), VectorUDAFMinLong(IdentityExpression[3:bigint]), VectorUDAFVarPopLong(IdentityExpression[3:bigint]), VectorUDAFSumLong(IdentityExpression[2:int]), VectorUDAFStdSampLong(IdentityExpression[0:tinyint]), VectorUDAFStdPopLong(IdentityExpression[1:smallint]), VectorUDAFAvgLong(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(IdentityExpression[5:double]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[1:smallint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(IdentityExpression[2:int]) output type STRUCT requires PRIMITIVE IS false + vectorized: true + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (- _col1) (type: float), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 - Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:float], IdentityExpression[2:float], IdentityExpression[3:double], IdentityExpression[4:bigint], IdentityExpression[5:decimal(23,3)], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:float], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:bigint], IdentityExpression[12:double], IdentityExpression[13:float], IdentityExpression[14:double], IdentityExpression[15:double], IdentityExpression[13:float], IdentityExpression[16:bigint], IdentityExpression[17:double], IdentityExpression[18:decimal(24,3)], IdentityExpression[19:decimal(25,3)], IdentityExpression[20:double], IdentityExpression[21:decimal(25,3)], IdentityExpression[22:double], IdentityExpression[23:double], IdentityExpression[24:double] + vectorized: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT cboolean1, MAX(cfloat), @@ -2856,12 +2930,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2880,28 +2958,73 @@ STAGE PLANS: alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2923,12 +3046,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2949,28 +3076,74 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3066,12 +3239,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3090,28 +3267,73 @@ STAGE PLANS: alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3133,12 +3355,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3159,28 +3385,74 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3202,12 +3474,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3228,28 +3504,74 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3271,12 +3593,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3297,28 +3623,74 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3340,12 +3712,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3366,28 +3742,74 @@ STAGE PLANS: Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[6:string]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3409,12 +3831,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3435,28 +3861,74 @@ STAGE PLANS: Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[10:boolean]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 6c6c6d6..317a4bf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,32 +122,70 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -187,12 +229,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -211,16 +257,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -237,6 +302,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -285,7 +354,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -293,9 +362,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -314,16 +387,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -340,6 +432,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 6e13369..3f85fb7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,14 +53,31 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -64,6 +85,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -113,7 +142,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -131,7 +160,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -149,6 +178,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -164,14 +197,31 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,6 +229,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index a95702d..0fb8552 100644 --- ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -156,6 +156,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -186,6 +190,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_context.q.out ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 1f70a01..855a50f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +132,14 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -176,6 +188,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -195,6 +215,14 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index f45e730..251fd51 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -233,20 +233,16 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_time), @@ -419,7 +415,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -434,7 +430,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -449,20 +445,16 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_date), @@ -635,7 +627,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -649,7 +641,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -663,20 +655,16 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - ListSink PREHOOK: query: -- Should all be true or NULL SELECT @@ -849,7 +837,7 @@ true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -858,7 +846,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -867,22 +855,16 @@ POSTHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT fl_date, @@ -927,7 +909,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -935,7 +917,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -943,6 +925,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -957,58 +943,37 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: fl_date (type: date) - outputColumnNames: fl_date - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(fl_date), max(fl_date), count(fl_date), count() - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT min(fl_date) AS c1, diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index ced9795..0b5d516 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,8 +61,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -88,10 +107,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,8 +149,23 @@ STAGE PLANS: Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -145,6 +183,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 76c8404..c2e1dfd 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 11 12 -PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds +POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -74,8 +78,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -199,11 +214,15 @@ POSTHOOK: Output: default@srcpart_double_hour POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -234,6 +253,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -269,6 +292,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -290,6 +321,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -329,10 +367,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -363,6 +405,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -383,6 +429,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -404,6 +458,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -457,13 +518,17 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -495,6 +560,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -530,6 +599,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -565,6 +642,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -602,6 +687,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -645,12 +737,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -683,6 +779,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -703,6 +803,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -723,6 +831,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -760,6 +876,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -815,11 +938,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -849,6 +976,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -899,6 +1030,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -920,6 +1059,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -959,10 +1105,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -993,6 +1143,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1013,6 +1167,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1034,6 +1196,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1085,11 +1254,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1120,6 +1293,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1155,6 +1332,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1176,6 +1361,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1215,10 +1407,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1249,6 +1445,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1269,6 +1469,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1290,6 +1498,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1339,11 +1554,15 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 0 PREHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1374,6 +1593,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1409,6 +1632,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1430,6 +1661,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1469,10 +1707,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1503,6 +1745,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1538,6 +1784,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1559,6 +1813,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1598,10 +1859,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1632,6 +1897,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1652,6 +1921,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1673,6 +1950,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1712,10 +1996,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1746,6 +2034,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1766,6 +2058,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1787,6 +2087,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1839,10 +2146,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1873,6 +2184,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -1908,6 +2223,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1929,6 +2252,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1983,11 +2313,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 1000 Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2015,6 +2349,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2035,6 +2373,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2056,6 +2398,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2071,6 +2420,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -2118,11 +2474,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 1000 Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY POSTHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2151,6 +2511,10 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2170,6 +2534,14 @@ STAGE PLANS: value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2197,6 +2569,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2238,11 +2617,15 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 PREHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY POSTHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2272,6 +2655,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2322,6 +2709,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2343,6 +2738,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2383,11 +2785,15 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2418,6 +2824,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2453,6 +2863,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2474,6 +2892,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2494,10 +2919,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2546,6 +2975,14 @@ STAGE PLANS: Target Vertex: Map 4 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2562,6 +2999,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2583,6 +3024,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2604,11 +3052,15 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2638,6 +3090,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -2673,6 +3129,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2694,6 +3158,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2715,13 +3186,17 @@ STAGE PLANS: ListSink PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2753,6 +3228,10 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -2788,6 +3267,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2808,6 +3295,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2845,6 +3340,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2884,12 +3386,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2925,6 +3431,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 5 Map Operator Tree: TableScan @@ -2945,6 +3457,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan @@ -2965,6 +3485,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3002,6 +3530,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3038,11 +3573,15 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 PREHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3075,6 +3614,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3095,6 +3638,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3115,6 +3662,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3136,6 +3687,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3151,6 +3709,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3187,6 +3752,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3247,10 +3819,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000 -PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3283,6 +3859,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 4 Map Operator Tree: TableScan @@ -3303,6 +3883,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3323,6 +3907,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3346,6 +3934,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3361,6 +3956,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3397,6 +3999,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3458,10 +4067,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 2008-04-09 -PREHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3496,6 +4109,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 10 Map Operator Tree: TableScan @@ -3516,6 +4133,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 5 Map Operator Tree: TableScan @@ -3534,6 +4155,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 7 Map Operator Tree: TableScan @@ -3554,8 +4179,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 11 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3607,6 +4243,13 @@ STAGE PLANS: Target Vertex: Map 5 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3638,6 +4281,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3651,6 +4301,13 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3732,11 +4389,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2008-04-09 2008-04-09 PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3780,6 +4441,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3815,8 +4480,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3870,13 +4550,17 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3929,6 +4613,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -3964,6 +4652,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3999,8 +4695,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4056,11 +4767,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4103,6 +4818,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4153,8 +4872,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4206,11 +4940,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4254,6 +4992,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4289,8 +5031,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4331,11 +5088,15 @@ POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 PREHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4379,6 +5140,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4414,8 +5179,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4455,10 +5235,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_double_hour #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4502,6 +5286,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4537,8 +5325,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4594,12 +5397,16 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4626,6 +5433,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4646,8 +5457,19 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -4676,6 +5498,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4724,11 +5553,15 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4772,6 +5605,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -4807,8 +5644,23 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4829,10 +5681,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4879,6 +5735,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -4895,8 +5759,19 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4918,11 +5793,15 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4952,6 +5831,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 2 Map Operator Tree: TableScan @@ -4985,8 +5868,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5008,13 +5906,17 @@ STAGE PLANS: ListSink PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5067,6 +5969,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -5102,6 +6008,14 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5122,8 +6036,23 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5163,12 +6092,16 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5203,6 +6136,12 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: unknown + Map Vectorization: + enabled: true + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 2 Map Operator Tree: TableScan @@ -5246,6 +6185,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -5266,8 +6213,23 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5304,11 +6266,15 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 PREHOOK: query: -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5356,6 +6322,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 3 Map Operator Tree: TableScan @@ -5376,6 +6346,10 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Map 6 Map Operator Tree: TableScan @@ -5396,8 +6370,19 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5413,6 +6398,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -5449,6 +6441,13 @@ STAGE PLANS: Target Vertex: Map 1 Reducer 7 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -5547,10 +6546,14 @@ POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(s POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_orc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 1bab6f7..27210d2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -60,6 +64,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -79,8 +91,21 @@ STAGE PLANS: Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out index 0a81f62..3759267 100644 --- ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -106,6 +106,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 800cbb6..93a68e9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,6 +70,14 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -86,6 +98,14 @@ STAGE PLANS: value expressions: _col1 (type: smallint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -105,8 +125,23 @@ STAGE PLANS: Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 8345132..6285c99 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -46,16 +46,20 @@ POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldS POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select * +PREHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * +POSTHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -99,7 +103,7 @@ POSTHOOK: Input: default@alltypes_parquet 528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p 528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p 528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p -PREHOOK: query: explain select ctinyint, +PREHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -108,7 +112,7 @@ PREHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain select ctinyint, +POSTHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -117,6 +121,10 @@ POSTHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,8 +160,17 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index b49d5dd..857465d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -118,12 +118,12 @@ POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: -- select -explain +explain vectorization detail SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY POSTHOOK: query: -- select -explain +explain vectorization detail SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY @@ -169,10 +169,10 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY Plan optimized by CBO. @@ -215,7 +215,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -227,7 +227,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 996b893..1638512 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -172,6 +176,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -224,6 +236,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -255,6 +272,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -377,7 +399,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -386,13 +408,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -428,6 +454,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -497,6 +531,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -571,6 +613,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -602,6 +649,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -707,7 +759,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -715,12 +767,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -750,6 +806,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -802,6 +866,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -896,7 +965,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -908,7 +977,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -918,6 +987,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -948,6 +1021,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1000,6 +1081,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1031,6 +1117,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1153,7 +1244,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1165,7 +1256,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1175,6 +1266,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1205,6 +1300,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1257,6 +1360,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1288,6 +1396,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1411,7 +1524,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1424,7 +1537,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1435,6 +1548,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1465,6 +1582,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1517,6 +1642,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -1556,6 +1686,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -1681,7 +1816,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1690,13 +1825,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1727,6 +1866,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1796,6 +1943,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1848,6 +2003,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -1968,7 +2128,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -1977,13 +2137,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2017,6 +2181,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2083,6 +2255,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2174,6 +2354,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) @@ -2259,7 +2444,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2268,13 +2453,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2320,6 +2509,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2372,6 +2567,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2403,6 +2603,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) @@ -2506,7 +2711,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2517,7 +2722,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2526,6 +2731,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2572,6 +2781,12 @@ STAGE PLANS: auto parallelism: true Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2624,6 +2839,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2656,6 +2876,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2776,7 +3001,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2787,7 +3012,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2796,6 +3021,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2826,6 +3055,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2878,6 +3115,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2909,6 +3151,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3029,7 +3276,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3041,7 +3288,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3051,6 +3298,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3082,6 +3333,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3134,6 +3393,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3188,6 +3452,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3227,6 +3496,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3349,7 +3623,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3364,7 +3638,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3377,6 +3651,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3407,6 +3685,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3459,6 +3745,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3490,6 +3781,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3610,7 +3906,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3624,7 +3920,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3636,6 +3932,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3667,6 +3967,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3736,6 +4044,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3788,6 +4104,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3845,6 +4166,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3986,7 +4312,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -3994,12 +4320,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4030,6 +4360,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4082,6 +4420,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -4121,6 +4464,13 @@ STAGE PLANS: Reducer 3 Execution mode: vectorized, llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) @@ -4215,7 +4565,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4223,7 +4573,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4231,6 +4581,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4271,6 +4625,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4323,6 +4685,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -4356,6 +4723,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) @@ -4496,7 +4868,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4512,7 +4884,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4528,6 +4900,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4564,6 +4940,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4616,6 +5000,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4656,6 +5045,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -4733,6 +5127,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -4774,6 +5173,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5041,7 +5445,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5060,7 +5464,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5077,6 +5481,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5108,6 +5516,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5160,6 +5576,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5221,6 +5642,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5260,6 +5686,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5396,7 +5827,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5415,7 +5846,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5432,6 +5863,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5464,6 +5899,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5516,6 +5959,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5554,6 +6002,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5585,6 +6038,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5616,6 +6074,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5752,7 +6215,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5769,7 +6232,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5784,6 +6247,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5815,6 +6282,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5867,6 +6342,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -5905,6 +6385,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -5943,6 +6428,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6075,7 +6565,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6094,7 +6584,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6111,6 +6601,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6143,6 +6637,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6195,6 +6697,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6233,6 +6740,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6280,6 +6792,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6312,6 +6829,11 @@ STAGE PLANS: Reducer 5 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6448,7 +6970,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6466,7 +6988,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6482,6 +7004,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6513,6 +7039,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6565,6 +7099,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6619,6 +7158,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -6658,6 +7202,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6792,7 +7341,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6808,7 +7357,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6822,6 +7371,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6853,6 +7406,14 @@ STAGE PLANS: auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6905,6 +7466,11 @@ STAGE PLANS: Reducer 2 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6959,6 +7525,11 @@ STAGE PLANS: Reducer 3 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) @@ -6991,6 +7562,11 @@ STAGE PLANS: Reducer 4 Execution mode: llap Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index e376ca1..ffcf9de 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -43,6 +47,14 @@ STAGE PLANS: Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -62,6 +74,14 @@ STAGE PLANS: Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -88,6 +108,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) @@ -101,6 +126,13 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out index bfac939..564c6e9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,6 +46,10 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index ceaac4f..e4defd2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -19,10 +19,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -48,10 +48,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -87,10 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY Plan optimized by CBO. @@ -116,10 +116,10 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY Plan optimized by CBO. diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 4092911..fc728b8 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,8 +131,23 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -218,7 +237,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -231,7 +250,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -244,6 +263,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,8 +295,23 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -363,7 +401,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -376,7 +414,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -389,6 +427,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -417,8 +459,23 @@ STAGE PLANS: value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -511,7 +568,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -525,7 +582,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -538,6 +595,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -566,8 +627,23 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) @@ -620,20 +696,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -666,8 +746,23 @@ STAGE PLANS: value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -708,15 +803,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -749,8 +848,21 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -788,7 +900,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -799,7 +911,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -810,6 +922,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -842,8 +958,21 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out index 1e74446..bcddc30 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -34,6 +34,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -115,7 +119,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -133,7 +137,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -151,6 +155,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index fbb43c4..9d9e5d3 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,47 +32,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,51 +74,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -138,47 +116,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(20,10)) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,51 +158,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -248,47 +200,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -301,47 +242,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -354,47 +284,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(20,10)) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -407,46 +326,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY @@ -695,12 +595,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -713,54 +617,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -773,54 +659,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -833,54 +701,36 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -893,49 +743,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 424a2c9..0448566 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,65 +128,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT i, diff --git ql/src/test/results/clientpositive/spark/vector_char_4.q.out ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 3e551bb..efb6491 100644 --- ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,9 +149,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToChar[13:Char], CastLongToChar[14:Char], CastLongToChar[15:Char], CastLongToChar[16:Char], VectorUDFAdaptor[17:char(20)], VectorUDFAdaptor[18:char(20)], CastStringGroupToChar[19:Char] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 73272fb..d96050b 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1259,8 +1263,21 @@ STAGE PLANS: Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[16:int] + vectorized: true Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[16:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1269,35 +1286,95 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index dbaf14d..99f8bf9 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -113,42 +117,10 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - ListSink PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY @@ -189,10 +161,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -205,44 +181,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - ListSink PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index cfdfce1..3807698 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -52,56 +56,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -130,7 +105,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -138,13 +113,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -157,55 +136,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 0493994..7f3f2c2 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -94,17 +98,42 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[1:long]) + vectorized: true predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(4,0)] + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -118,11 +147,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[1:long]) + vectorized: true predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(4,2)] + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -130,18 +171,38 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + vectorized: true outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 803a53b..5c40f35 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,8 +134,21 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -140,12 +157,40 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -153,9 +198,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_elt.q.out ql/src/test/results/clientpositive/spark/vector_elt.q.out index bb66867..51cc5ac 100644 --- ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -1,29 +1,21 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -47,7 +39,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -60,7 +52,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -73,6 +65,10 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index e13c311..2811b98 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,9 +134,23 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -141,14 +159,43 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[2:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -156,9 +203,20 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[0:tinyint], IdentityExpression[2:bigint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 511bd79..1839a8b 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -49,22 +53,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -73,47 +70,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -126,12 +96,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -143,27 +117,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -172,43 +134,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY @@ -257,12 +196,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -274,22 +217,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -298,47 +234,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -351,12 +260,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -368,22 +281,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -392,43 +298,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -441,12 +324,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -458,22 +345,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -482,47 +362,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -535,12 +388,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -552,22 +409,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -576,47 +426,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -629,12 +452,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -646,22 +473,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -670,47 +490,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -723,12 +516,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -740,22 +537,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -764,47 +554,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -817,12 +580,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -834,22 +601,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -858,47 +618,20 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index c08fbda..4bff72c 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -27,35 +31,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -66,66 +62,29 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select count(*) from (select c.ctinyint from alltypesorc c diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 012c3eb..7135335 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -55,6 +59,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -75,17 +83,40 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -137,6 +168,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -171,19 +206,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -217,6 +256,10 @@ STAGE PLANS: keys: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -237,17 +280,40 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -299,6 +365,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 4710a73..b21b149 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,9 +131,23 @@ STAGE PLANS: Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[7:boolean], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[7:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -138,14 +156,43 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -153,17 +200,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index b311c49..c814678 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -75,65 +79,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -155,12 +104,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -168,65 +121,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 6b89fb3..a0ff0ec 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -235,65 +239,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -332,18 +281,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -351,65 +304,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -534,7 +432,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -543,7 +441,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -552,6 +450,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -559,110 +461,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(_col0) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 113c7d0..9b14552 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -251,110 +255,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5760 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index c5a8de5..97c4cb0 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,117 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -380,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -389,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -398,117 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -534,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -543,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -552,117 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index 94860ab..56bad6b 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -246,85 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -397,85 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -904,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -913,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -922,117 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 36 Data size: 5307 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index bd9b852..8b5274a 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -66,105 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -184,111 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -308,111 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -432,111 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -556,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -565,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -574,117 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 2229 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 2451 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -770,105 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: st - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -888,111 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1012,111 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1136,111 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1260,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1269,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1278,117 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-2":{"ROOT STAGE":"TRUE"},"Stage-1":{"DEPENDENT STAGES":"Stage-2"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-2":{},"Stage-1":{},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 5497426..5b69aa8 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -97,32 +97,26 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - TableScan - alias: over1korc - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 20 - ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -265,20 +259,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -299,8 +297,21 @@ STAGE PLANS: Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringGroupConcatColCol[19:String_Family](StringGroupColConcatStringScalar[17:String_Family](StringScalarConcatStringGroupCol[18:String_Family](CastLongToString[17:String](CastDoubleToLong[13:long](DoubleColAddDoubleScalar[15:double](DoubleColDivideDoubleScalar[16:double](CastLongToDouble[15:double](LongColSubtractLongScalar[14:long](VectorUDFMonthDate[13:long])))))))) CastLongToString[18:String](VectorUDFYearDate[13:long])) + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[19:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -309,13 +320,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -323,20 +362,49 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 9a46ee1..c78bd62 100644 --- ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,9 +149,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToVarChar[13:VarChar], CastLongToVarChar[14:VarChar], CastLongToVarChar[15:VarChar], CastLongToVarChar[16:VarChar], VectorUDFAdaptor[17:varchar(20)], VectorUDFAdaptor[18:varchar(20)], CastStringGroupToVarChar[19:VarChar] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -155,6 +170,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 22fe7cd..86f35a6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,39 +44,104 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFCount(IdentityExpression[0:tinyint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[1:tinyint]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:tinyint], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -104,16 +173,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,38 +207,103 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -191,7 +329,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -204,7 +342,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -217,6 +355,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -230,56 +372,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(ctinyint) as c1, @@ -310,7 +429,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -318,7 +437,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -326,6 +445,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -346,39 +469,104 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[3:bigint]), VectorUDAFMaxLong(IdentityExpression[3:bigint]), VectorUDAFCount(IdentityExpression[3:bigint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:bigint]), VectorUDAFMaxLong(IdentityExpression[1:bigint]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:bigint], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -410,16 +598,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -440,38 +632,103 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -497,7 +754,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -510,7 +767,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -523,6 +780,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -536,56 +797,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(cbigint) as c1, @@ -616,7 +854,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -624,7 +862,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -632,6 +870,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -652,39 +894,104 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(IdentityExpression[4:float]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFCount(IdentityExpression[4:float]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(IdentityExpression[0:float]), VectorUDAFMaxDouble(IdentityExpression[1:float]), VectorUDAFCountMerge(IdentityExpression[2:bigint]), VectorUDAFCountMerge(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:float], IdentityExpression[1:float], IdentityExpression[2:bigint], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -716,16 +1023,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -746,38 +1057,103 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(IdentityExpression[0:double]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -803,7 +1179,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -816,7 +1192,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -829,6 +1205,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -842,56 +1222,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(cfloat) as c1, @@ -923,7 +1280,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -950,7 +1307,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -977,6 +1334,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -994,14 +1355,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterStringColLikeStringScalar[-1:boolean] FilterDecimalScalarNotEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(13,3)]) FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterLongColEqualLongScalar[-1:boolean] FilterLongScalarEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]))) + vectorized: true predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint], IdentityExpression[4:float], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(IdentityExpression[3:bigint]), VectorUDAFStdPopLong(IdentityExpression[3:bigint]), VectorUDAFVarSampLong(IdentityExpression[3:bigint]), VectorUDAFCountStar, VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE @@ -1010,7 +1391,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 6d5c9d9..e207cbf 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -86,14 +90,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -105,7 +130,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -124,16 +162,39 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:tinyint], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:string], IdentityExpression[5:tinyint], IdentityExpression[6:tinyint], IdentityExpression[7:tinyint], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:float], IdentityExpression[12:double], IdentityExpression[10:double], IdentityExpression[14:double], IdentityExpression[15:decimal(7,3)], IdentityExpression[16:double], IdentityExpression[17:double], IdentityExpression[18:float], IdentityExpression[19:double], IdentityExpression[20:tinyint] + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -255,7 +316,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -289,7 +350,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -322,6 +383,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -340,14 +405,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -359,7 +445,20 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -378,16 +477,39 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:tinyint], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:string], IdentityExpression[5:tinyint], IdentityExpression[6:tinyint], IdentityExpression[7:tinyint], IdentityExpression[8:double], IdentityExpression[9:double], IdentityExpression[10:double], IdentityExpression[11:float], IdentityExpression[12:double], IdentityExpression[10:double], IdentityExpression[14:double], IdentityExpression[15:decimal(7,3)], IdentityExpression[16:double], IdentityExpression[17:double], IdentityExpression[18:float], IdentityExpression[19:double], IdentityExpression[20:tinyint] + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 99192bd..e5d6ed9 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -81,67 +85,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) - sort order: ++++ - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cfloat, diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 0a676bd..42f153a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,67 +81,33 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) - outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - sort order: +++++++ - Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) - keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - sort order: +++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reducer 3 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cboolean1, diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index d7f8ba4..3a06931 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,55 +58,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(38,16)), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cstring1, cdouble, diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index 878dcce..a1cacb1 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,43 +66,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: bigint), _col0 (type: float) - sort order: ++ - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cstring1, diff --git ql/src/test/results/clientpositive/spark/vectorization_7.q.out ql/src/test/results/clientpositive/spark/vectorization_7.q.out index 11f0083..e71833f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -73,34 +77,84 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint], IdentityExpression[2:smallint], IdentityExpression[3:tinyint], IdentityExpression[4:timestamp], IdentityExpression[5:string], IdentityExpression[6:bigint], IdentityExpression[7:int], IdentityExpression[8:smallint], IdentityExpression[9:tinyint], IdentityExpression[10:int], IdentityExpression[11:bigint], IdentityExpression[12:int], IdentityExpression[9:tinyint], IdentityExpression[14:tinyint] + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -190,7 +244,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -218,7 +272,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -245,6 +299,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -262,34 +320,84 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true + predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:boolean], IdentityExpression[1:bigint], IdentityExpression[2:smallint], IdentityExpression[3:tinyint], IdentityExpression[4:timestamp], IdentityExpression[5:string], IdentityExpression[6:bigint], IdentityExpression[7:int], IdentityExpression[8:smallint], IdentityExpression[9:tinyint], IdentityExpression[10:int], IdentityExpression[11:bigint], IdentityExpression[12:int], IdentityExpression[9:tinyint], IdentityExpression[14:tinyint] + vectorized: true Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_8.q.out ql/src/test/results/clientpositive/spark/vectorization_8.q.out index 1d4f32b..0ceec2e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,34 +73,84 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:double], IdentityExpression[2:boolean], IdentityExpression[3:string], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:float], IdentityExpression[9:double], IdentityExpression[5:double], IdentityExpression[11:float], IdentityExpression[12:float], IdentityExpression[13:double] + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -177,7 +231,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -203,7 +257,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -228,6 +282,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -245,34 +303,84 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cstring2 is not null and ((UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998))) or ((cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0)))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true + predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: double) + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: double), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], IdentityExpression[1:double], IdentityExpression[2:boolean], IdentityExpression[3:string], IdentityExpression[4:float], IdentityExpression[5:double], IdentityExpression[6:double], IdentityExpression[7:double], IdentityExpression[8:float], IdentityExpression[9:double], IdentityExpression[5:double], IdentityExpression[11:float], IdentityExpression[12:float], IdentityExpression[13:double] + vectorized: true + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index d7f8ba4..3a06931 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,55 +58,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(38,16)), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cstring1, cdouble, diff --git ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out index 9a6cb52..2b7b411 100644 --- ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out @@ -12,28 +12,20 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - TableScan - alias: date_decimal_test - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vectorization_div0.q.out ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index a70647e..e8772fe 100644 --- ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -134,17 +138,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,31 +170,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: LongColSubtractLongScalar[12:long], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[14:double](LongColSubtractLongScalar[13:long])), DecimalScalarDivideDecimalColumn[17:decimal(22,21)](CastLongToDecimal[16:decimal(19,0)](LongColSubtractLongScalar[13:long])) + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:double], IdentityExpression[2:decimal(22,21)] + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -311,16 +368,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -338,31 +399,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleScalar[-1:boolean] FilterDoubleColLessDoubleScalar[-1:boolean]) + vectorized: true predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: DoubleColAddDoubleScalar[12:double], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[13:double] DoubleColAddDoubleScalar[14:double]), DoubleColDivideDoubleColumn[16:double](DoubleColAddDoubleScalar[13:double] DoubleColAddDoubleScalar[14:double]), DoubleScalarDivideDoubleColumn[14:double](DoubleColAddDoubleScalar[13:double]), DoubleScalarDivideDoubleColumn[17:double](DoubleColAddDoubleScalar[13:double]) + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double], IdentityExpression[1:double], IdentityExpression[2:double], IdentityExpression[1:double], IdentityExpression[3:double], IdentityExpression[4:double] + vectorized: true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out index 23d7ab3..aaf7fcc 100644 --- ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,43 +66,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdouble + 2.0) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2720 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2720 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 59a3be0..e6e5911 100644 --- ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,48 +19,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 2347731..eb37276 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,63 +136,20 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) - outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT AVG(cint), (AVG(cint) + -3728), @@ -274,7 +233,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -312,7 +272,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -345,63 +306,20 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) - outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT MAX(cint), (MAX(cint) / -3728), @@ -479,7 +397,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -516,7 +435,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -548,63 +468,20 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) - outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -681,7 +558,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -708,7 +586,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -730,63 +609,20 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cint, cfloat - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(ctinyint), @@ -843,7 +679,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -884,7 +721,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -920,58 +758,20 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cdouble, @@ -1104,7 +904,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1144,7 +945,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1179,58 +981,20 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) - sort order: +++++++++++++++++++++++++ - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 25 - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 25 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cbigint, @@ -1314,7 +1078,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1353,7 +1118,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1387,59 +1153,20 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: boolean) - Execution mode: vectorized - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 75 - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 75 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cint, @@ -1594,7 +1321,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1626,7 +1354,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1653,59 +1382,20 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) - sort order: +++++++++++++++ - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: timestamp) - Execution mode: vectorized - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 45 - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 45 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring2, @@ -1815,7 +1505,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1840,7 +1531,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1860,83 +1552,20 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) - outputColumnNames: csmallint, cbigint, ctinyint - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() - keys: csmallint (type: smallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) - keys: KEY._col0 (type: smallint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(19,18)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(19,18)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) - sort order: +++++++++++ - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 20 Processor Tree: - ListSink PREHOOK: query: SELECT csmallint, (csmallint % -75) as c1, @@ -2008,7 +1637,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2040,7 +1670,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2067,80 +1698,20 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cfloat (type: float) - outputColumnNames: cdouble, cfloat - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cdouble, @@ -2206,7 +1777,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2266,7 +1838,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2321,83 +1894,20 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(20,18)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(20,18)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) - sort order: +++++++++++++++++++++++++++++++++++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring1, @@ -2568,7 +2078,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2613,7 +2124,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2653,80 +2165,20 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) - outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) - keys: cboolean1 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (- _col1) (type: float), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT cboolean1, MAX(cfloat), @@ -2824,12 +2276,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2847,27 +2303,72 @@ STAGE PLANS: alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2889,12 +2390,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2914,27 +2419,73 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3030,12 +2581,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3053,27 +2608,72 @@ STAGE PLANS: alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3095,12 +2695,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3120,27 +2724,73 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3162,12 +2812,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3187,27 +2841,73 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3229,12 +2929,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3254,27 +2958,73 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3296,12 +3046,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3321,27 +3075,73 @@ STAGE PLANS: Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[6:string]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3363,12 +3163,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3388,27 +3192,73 @@ STAGE PLANS: Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[10:boolean]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(IdentityExpression[0:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index a12ac05..d01be5e 100644 --- ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -115,8 +119,14 @@ STAGE PLANS: alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -124,19 +134,43 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -158,12 +192,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -179,8 +217,14 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -188,19 +232,43 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -227,7 +295,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -235,9 +303,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -253,8 +325,14 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -262,19 +340,43 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c06ea94..ea1cb9c 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,20 +53,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -112,7 +141,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -130,7 +159,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -148,6 +177,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,20 +196,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 7c4467a..d1786ab 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -21,22 +25,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 3 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -47,63 +44,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 diff --git ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out index 0a81f62..c73f5e1 100644 --- ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -106,22 +106,16 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index a3d5947..9b233d0 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -17,41 +21,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 3 - Map Operator Tree: - TableScan - alias: v1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 - Map Operator Tree: - TableScan - alias: v3 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: csmallint (type: smallint) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -62,74 +52,29 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: v2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: smallint), _col3 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 2ab2541..240d840 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -155,21 +159,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -221,107 +219,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -373,7 +285,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -382,13 +294,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -403,25 +319,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -472,24 +378,15 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [p1] Map 5 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -541,114 +438,28 @@ STAGE PLANS: /part_orc [p2] Reducer 2 Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: j - output shape: _col1: string, _col2: string, _col5: int - type: SUBQUERY - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: lag_window_0 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -696,7 +507,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -704,12 +515,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -722,21 +537,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -788,57 +597,14 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size from noop(on part_orc @@ -882,7 +648,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -894,7 +660,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -904,6 +670,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -917,21 +687,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -983,107 +747,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1135,7 +813,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1147,7 +825,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1157,6 +835,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1170,21 +852,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1236,108 +912,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: lag_window_2 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1389,7 +978,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1402,7 +991,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1413,6 +1002,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1426,21 +1019,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1492,117 +1079,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col2, _col1, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: lag_window_2 - arguments: _col2, 1, _col2 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1655,7 +1146,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1664,13 +1155,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1684,21 +1179,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1749,24 +1238,15 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Map 4 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1818,77 +1298,21 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.* from noop(on part_orc @@ -1936,7 +1360,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -1945,13 +1369,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1965,24 +1393,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2033,21 +1452,15 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [p1] Map 3 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2099,81 +1512,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 p_partkey (type: int) - 1 _col0 (type: int) - outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.* from part_orc p1 join noop(on part_orc @@ -2221,7 +1574,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2230,13 +1583,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2250,35 +1607,12 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: p_name: string, p_mfgr: string, p_size: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: p_name ASC NULLS FIRST, p_size DESC NULLS LAST - output shape: p_name: string, p_mfgr: string, p_size: int - partition by: p_mfgr - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - null sort order: aaz - sort order: ++- - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2330,94 +1664,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - null sort order: aaz - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1, _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r @@ -2463,7 +1724,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2474,7 +1735,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2483,6 +1744,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2496,36 +1761,12 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: p_name ASC NULLS FIRST - output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double - partition by: p_mfgr - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2577,108 +1818,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -2728,7 +1882,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2739,7 +1893,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2748,6 +1902,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2761,21 +1919,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2827,107 +1979,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -2977,7 +2043,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2989,7 +2055,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2999,6 +2065,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3013,21 +2083,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3079,168 +2143,28 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -3292,7 +2216,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3307,7 +2231,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3320,6 +2244,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3333,21 +2261,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3399,99 +2321,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: _col5 - name: count - window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~ - window function definition - alias: sum_window_1 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:bigint:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, sub1.cd, sub1.s1 @@ -3549,7 +2393,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3563,7 +2407,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3575,6 +2419,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3589,21 +2437,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3654,24 +2496,15 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3723,144 +2556,28 @@ STAGE PLANS: /part_orc [p1] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: count_window_2 - arguments: _col1 - name: count - window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~ - window function definition - alias: sum_window_3 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - window function definition - alias: lag_window_4 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types string:string:int:int:bigint:double:double:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, @@ -3918,7 +2635,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -3926,12 +2643,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3945,21 +2666,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4011,79 +2726,24 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col2, _col1, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Execution mode: vectorized Needs Tagging: false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc @@ -4144,7 +2804,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4152,7 +2812,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4160,6 +2820,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4173,31 +2837,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_brand, p_retailprice - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(p_retailprice) - keys: p_mfgr (type: string), p_brand (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4249,95 +2897,21 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: mfgr_price_view - output shape: _col0: string, _col1: string, _col2: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col0 ASC NULLS FIRST - output shape: _col0: string, _col1: string, _col2: double - partition by: _col0 - raw input shape: - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~ - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:double:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 @@ -4421,7 +2995,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4437,7 +3011,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -4453,6 +3027,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -4472,21 +3050,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4538,259 +3110,39 @@ STAGE PLANS: /part_orc [part_orc] Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(5)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, sum_window_0 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: sum_window_0 (type: bigint), _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 5 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: bigint, _col2: string, _col3: string, _col6: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST - partition by: _col3 - raw input shape: - window functions: - window function definition - alias: rank_window_1 - arguments: _col3, _col2 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_2 - arguments: _col3, _col2 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: cume_dist_window_3 - arguments: _col3, _col2 - name: cume_dist - window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: first_value_window_4 - arguments: _col6, true - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 6 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 7 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Move Operator @@ -4974,7 +3326,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -4993,7 +3345,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5010,6 +3362,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5024,21 +3380,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5090,175 +3440,28 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, @@ -5324,7 +3527,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5343,7 +3546,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5360,6 +3563,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5375,21 +3582,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5441,174 +3642,35 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 5 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, @@ -5674,7 +3736,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5691,7 +3753,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5706,6 +3768,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5720,21 +3786,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5786,151 +3846,28 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, @@ -5992,7 +3929,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6011,7 +3948,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6028,6 +3965,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6043,21 +3984,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6109,191 +4044,35 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 5 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, @@ -6359,7 +4138,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6377,7 +4156,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6393,6 +4172,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6407,21 +4190,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6473,168 +4250,28 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, @@ -6698,7 +4335,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6714,7 +4351,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6728,6 +4365,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -6742,21 +4383,15 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6808,161 +4443,28 @@ STAGE PLANS: /part_orc [part_orc] Reducer 2 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 3 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Reducer 4 Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 535da25..cffff28 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -24,96 +28,49 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tez doesn't use tagging + vectorized: false Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false Reducer 4 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint) AS CNT, MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 diff --git ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out index bfac939..a000fda 100644 --- ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,22 +46,16 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 5eb896f..cf0c4a6 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -111,40 +115,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(ctimestamp1) AS c1, @@ -216,7 +207,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -229,7 +220,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -242,6 +233,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,40 +249,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(stimestamp1) AS c1, @@ -359,7 +341,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -372,7 +354,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -385,6 +367,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -397,40 +383,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- Should all be true or NULL SELECT @@ -505,7 +478,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -519,7 +492,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -532,6 +505,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -544,40 +521,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_wrong - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(stimestamp1) AS c1, @@ -612,20 +576,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -638,45 +606,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT min(ctimestamp1), @@ -698,15 +648,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -719,48 +673,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(ctimestamp1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: round(_col0, 3) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT round(sum(ctimestamp1), 3) @@ -775,7 +705,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -786,7 +716,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -797,6 +727,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -809,48 +743,24 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT round(avg(ctimestamp1), 0), diff --git ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index 8ce85f6..e818f82 100644 --- ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY Plan optimized by CBO. @@ -107,33 +107,9 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) Stage-0 - Fetch Operator - limit:-1 + limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_21] (rows=2 width=431) - Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_23] - PartitionCols:_col2 - Select Operator [SEL_22] (rows=2 width=134) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=2 width=236) - default@char_tbl1,c1,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] - Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=134) - Group By Operator [GBY_25] (rows=1 width=134) - Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=2 width=134) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_22] - <-Map 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_28] - PartitionCols:_col2 - Select Operator [SEL_27] (rows=2 width=89) - Output:["_col0","_col1","_col2"] - TableScan [TS_3] (rows=2 width=190) - default@char_tbl2,c2,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index 4535e66..7950a84 100644 --- ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -44,47 +48,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: non_string_part - Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL - Filter Operator - predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: cint (type: int), ctinyint (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY @@ -106,10 +90,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,46 +111,27 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: non_string_part - Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/vectorization_div0.q.out ql/src/test/results/clientpositive/tez/vectorization_div0.q.out index 12b90a4..1422f21 100644 --- ql/src/test/results/clientpositive/tez/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_div0.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -134,17 +138,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,31 +171,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: LongColSubtractLongScalar[12:long], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[14:double](LongColSubtractLongScalar[13:long])), DecimalScalarDivideDecimalColumn[17:decimal(22,21)](CastLongToDecimal[16:decimal(19,0)](LongColSubtractLongScalar[13:long])) + vectorized: true Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:double], IdentityExpression[2:decimal(22,21)] + vectorized: true Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -312,16 +369,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -340,31 +401,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleScalar[-1:boolean] FilterDoubleColLessDoubleScalar[-1:boolean]) + vectorized: true predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: DoubleColAddDoubleScalar[12:double], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[13:double] DoubleColAddDoubleScalar[14:double]), DoubleColDivideDoubleColumn[16:double](DoubleColAddDoubleScalar[13:double] DoubleColAddDoubleScalar[14:double]), DoubleScalarDivideDoubleColumn[14:double](DoubleColAddDoubleScalar[13:double]), DoubleScalarDivideDoubleColumn[17:double](DoubleColAddDoubleScalar[13:double]) + vectorized: true Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:double], IdentityExpression[1:double], IdentityExpression[2:double], IdentityExpression[1:double], IdentityExpression[3:double], IdentityExpression[4:double] + vectorized: true Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index 71e470b..bed0bd1 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -1,26 +1,18 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 7 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 7 - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 @@ -40,14 +32,18 @@ POSTHOOK: Input: default@alltypesorc -1887561756 9531.0 PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,31 +62,80 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double], IdentityExpression[1:smallint] + vectorized: true Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:double], IdentityExpression[2:smallint] + vectorized: true Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -132,13 +177,17 @@ POSTHOOK: Input: default@alltypesorc -64 -1600.0 -1600 -64 -200.0 -200 PREHOOK: query: -- deduped RS -explain +explain vectorization detail select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- deduped RS -explain +explain vectorization detail select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -159,9 +208,24 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], DoubleColAddDoubleScalar[12:double] + vectorized: true Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(IdentityExpression[12:double]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(IdentityExpression[12:double]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -174,7 +238,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -228,13 +305,17 @@ NULL 9370.0945309795 -47 -574.6428571428571 -46 3033.55 PREHOOK: query: -- distincts -explain +explain vectorization detail select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- distincts -explain +explain vectorization detail select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -255,8 +336,21 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -265,22 +359,60 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + vectorized: true Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -321,12 +453,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -347,8 +483,21 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double] + vectorized: true Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -357,27 +506,73 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + vectorized: true Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[1:double]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -419,13 +614,17 @@ NULL 2932 -47 22 -46 24 PREHOOK: query: -- limit zero -explain +explain vectorization detail select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY POSTHOOK: query: -- limit zero -explain +explain vectorization detail select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -445,13 +644,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization detail select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization detail select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -471,14 +674,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:double], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -487,14 +710,42 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + vectorized: true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[1:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 @@ -502,20 +753,49 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:double], IdentityExpression[0:bigint] + vectorized: true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out index bacb3bb..27d7738 100644 --- ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out @@ -100,20 +100,24 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -139,6 +143,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -163,20 +173,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -202,6 +216,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_extract (Column[c2], Const string val_([0-9]+), Const int 1) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -226,20 +246,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -265,6 +289,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_replace (Column[c2], Const string val, Const string replaced) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -289,20 +319,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -328,6 +362,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -352,20 +392,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -380,18 +424,42 @@ STAGE PLANS: Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[4:String], VectorUDFAdaptor[5:String], StringGroupColEqualStringGroupColumn[8:boolean](VectorUDFAdaptor[6:String] VectorUDFAdaptor[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -416,20 +484,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -444,18 +516,42 @@ STAGE PLANS: Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[4:String], VectorUDFAdaptor[5:String], StringGroupColEqualStringGroupColumn[8:boolean](VectorUDFAdaptor[6:String] VectorUDFAdaptor[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -480,10 +576,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -491,27 +591,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: power(key, 2) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -559,20 +647,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -598,6 +690,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=none + vectorized: false Stage: Stage-0 Fetch Operator @@ -622,10 +720,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -633,27 +735,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: power(key, 2) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -701,20 +791,24 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -740,6 +834,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones + vectorized: false Stage: Stage-0 Fetch Operator @@ -764,12 +864,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -797,6 +901,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFWhen(Column[bool], Const int 1, GenericUDFOPNot(Column[bool]), Const int 0, Const void null) because hive.vectorized.adaptor.usage.mode=none + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -831,12 +945,16 @@ key2 1 key3 0 key4 1 key5 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -851,9 +969,23 @@ STAGE PLANS: Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], VectorUDFAdaptor[3:Long](NotCol[2:boolean]) + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -862,9 +994,27 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 19f8093..4cfc93d 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,9 +125,23 @@ STAGE PLANS: Select Operator expressions: dc (type: decimal(38,18)) outputColumnNames: dc + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[6:decimal(38,18)] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFMaxDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFSumDecimal(IdentityExpression[6:decimal(38,18)]), VectorUDAFAvgDecimal(IdentityExpression[6:decimal(38,18)]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(IdentityExpression[6:decimal(38,18)]) output type STRUCT requires PRIMITIVE IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +150,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index 3043a6c..cc13830 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -33,10 +33,14 @@ POSTHOOK: Output: default@testvec POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY -POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: query: explain vectorization select max(dt), max(greg_dt) from testvec where id=5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -44,46 +48,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: testvec - Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id = 5) (type: boolean) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: dt (type: int), greg_dt (type: string) - outputColumnNames: dt, greg_dt - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(dt), max(greg_dt) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), max(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out index 9837b26..be5a887 100644 --- ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out @@ -43,17 +43,21 @@ POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,11 +70,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -78,14 +94,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -120,7 +166,7 @@ POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### 22 PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select key, count(*) from @@ -131,7 +177,7 @@ select count(*) from ) subq2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select key, count(*) from @@ -141,6 +187,10 @@ select count(*) from group by key ) subq2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -154,11 +204,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -167,7 +229,19 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -175,7 +249,25 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -200,6 +292,14 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -245,7 +345,7 @@ POSTHOOK: Input: default@tbl2 6 PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization detail select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -264,7 +364,7 @@ on src1.key = src2.key PREHOOK: type: QUERY POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -explain +explain vectorization detail select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -281,6 +381,10 @@ join ) src2 on src1.key = src2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -295,11 +399,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -308,8 +424,21 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -317,8 +446,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -347,6 +494,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -372,11 +526,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -385,8 +551,21 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -394,8 +573,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -461,7 +658,7 @@ POSTHOOK: Input: default@tbl2 9 1 1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -470,13 +667,17 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -489,11 +690,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -501,14 +714,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -548,7 +791,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization detail select count(*) from ( select * from @@ -562,7 +805,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query -explain +explain vectorization detail select count(*) from ( select * from @@ -574,6 +817,10 @@ select count(*) from join tbl2 b on subq2.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -586,11 +833,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColLessLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -598,14 +857,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -655,7 +944,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select * from @@ -676,7 +965,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select * from @@ -695,6 +984,10 @@ select count(*) from ) subq4 on subq2.key = subq4.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -707,11 +1000,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColLessLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -719,14 +1024,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -789,7 +1124,7 @@ POSTHOOK: Input: default@tbl1 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization detail select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -799,13 +1134,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. -explain +explain vectorization detail select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -818,11 +1157,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true predicate: (key < 8) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -830,14 +1181,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -877,7 +1258,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization detail select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -886,13 +1267,17 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side -- join should be performed -explain +explain vectorization detail select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -932,6 +1317,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Join Operator condition map: @@ -960,6 +1352,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1001,18 +1401,22 @@ POSTHOOK: Input: default@tbl2 22 PREHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is a sub-query and the other is not. -- It should be converted to a sort-merge join. -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1025,11 +1429,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1037,14 +1453,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1080,7 +1526,7 @@ POSTHOOK: Input: default@tbl2 20 PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1092,7 +1538,7 @@ select count(*) from PREHOOK: type: QUERY POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join -explain +explain vectorization detail select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1102,6 +1548,10 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1114,11 +1564,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColLessLongScalar[-1:boolean] + vectorized: true predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1128,14 +1590,44 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1181,7 +1673,7 @@ POSTHOOK: Input: default@tbl2 56 PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1196,7 +1688,7 @@ on subq2.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join -explain +explain vectorization detail select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1209,6 +1701,10 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1221,11 +1717,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColLessLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((key < 8) and (key < 6)) (type: boolean) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: @@ -1233,14 +1741,44 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1308,7 +1846,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1317,13 +1855,17 @@ insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to -- a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1349,6 +1891,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -1358,14 +1906,36 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:string] + vectorized: true Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string] + vectorized: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1373,12 +1943,25 @@ STAGE PLANS: name: default.dest1 File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-8 Conditional Operator @@ -1587,7 +2170,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@dest2 PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -1596,13 +2179,17 @@ insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. -- It should be converted to a sort-merge join -explain +explain vectorization detail from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1623,6 +2210,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -1632,8 +2225,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1642,8 +2245,22 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1651,8 +2268,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index 0b9401d..3d74ab0 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -86,7 +90,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:tint - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:tint @@ -104,42 +107,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: tsint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), csint (type: smallint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint @@ -179,13 +160,17 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 3 10 1 NoOk 4 4 10 10 Ok Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -196,7 +181,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:tint - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:tint @@ -214,45 +198,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: tsint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), csint (type: smallint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) - Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 184 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint diff --git ql/src/test/results/clientpositive/vector_between_in.q.out ql/src/test/results/clientpositive/vector_between_in.q.out index 9f351b2..af5e066 100644 --- ql/src/test/results/clientpositive/vector_between_in.q.out +++ ql/src/test/results/clientpositive/vector_between_in.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -23,45 +27,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,49 +59,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -119,45 +91,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(20,10)) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -165,49 +123,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -215,45 +155,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -261,45 +187,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -307,45 +219,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(20,10)) - sort order: + - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -353,44 +251,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY @@ -639,12 +515,16 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -652,52 +532,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -705,52 +564,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -758,52 +596,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink -PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -811,47 +628,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 2110d43..c9a2141 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -115,7 +119,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:$hdt$_0:t1 @@ -136,59 +139,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: bin is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col10 (type: binary) - 1 _col10 (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for SelectColumnIsNotNull argument #0 type name Binary + vectorized: false Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin @@ -201,16 +166,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -218,51 +187,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: hundredorc - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: bin (type: binary) - outputColumnNames: bin - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: bin (type: binary) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: binary) - sort order: + - Map-reduce partition columns: _col0 (type: binary) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: binary) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: binary) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT count(*), bin FROM hundredorc @@ -304,16 +244,20 @@ POSTHOOK: Input: default@hundredorc 3 zync studies PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -350,11 +294,23 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int], IdentityExpression[10:binary] + vectorized: true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -362,20 +318,45 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:binary], IdentityExpression[2:binary] + vectorized: true Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_bround.q.out ql/src/test/results/clientpositive/vector_bround.q.out index f92d749..50f3e17 100644 --- ql/src/test/results/clientpositive/vector_bround.q.out +++ ql/src/test/results/clientpositive/vector_bround.q.out @@ -34,10 +34,14 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -45,28 +49,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: test_vector_bround - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: bround(v0) (type: double), bround(v1, 1) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out index 7a6a4da..f9965df 100644 --- ql/src/test/results/clientpositive/vector_bucket.q.out +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,6 +37,14 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 5cd8085..7133450 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -119,73 +123,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT i, diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index dfe545b..cd12146 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -67,73 +71,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), count() - keys: _col0 (type: char(20)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Map-reduce partition columns: _col0 (type: char(20)) - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: char(20)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select value, sum(cast(key as int)), count(*) numrows @@ -179,18 +143,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -199,73 +167,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), count() - keys: _col0 (type: char(20)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Map-reduce partition columns: _col0 (type: char(20)) - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: char(20)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select value, sum(cast(key as int)), count(*) numrows diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out index 58988bf..39d28c3 100644 --- ql/src/test/results/clientpositive/vector_char_4.q.out +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -147,9 +151,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToChar[13:Char], CastLongToChar[14:Char], CastLongToChar[15:Char], CastLongToChar[16:Char], VectorUDFAdaptor[17:char(20)], VectorUDFAdaptor[18:char(20)], CastStringGroupToChar[19:Char] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 6528f75..df542e2 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with same length char -explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -140,7 +144,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:a @@ -161,51 +164,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: char(10)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: char(10)) - 1 _col1 (type: char(10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -225,11 +201,15 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with different length char -explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -240,7 +220,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:a @@ -261,51 +240,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: char(20)) - 1 _col1 (type: char(20)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -327,11 +279,15 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join char with string -explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -342,7 +298,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:b @@ -363,51 +318,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: char(10)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToString(_col1) (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index e1dedae..d038b45 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,42 +66,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -139,16 +123,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,42 +144,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -236,12 +204,16 @@ create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -257,16 +229,45 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out index 0101b66..f32b5da 100644 --- ql/src/test/results/clientpositive/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c @@ -8,12 +8,16 @@ LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +EXPLAIN VECTORIZATION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -21,44 +25,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -86,18 +68,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -105,44 +91,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -170,18 +134,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -189,41 +157,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -251,18 +200,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -270,44 +223,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -335,18 +266,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -354,41 +289,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -416,16 +332,20 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -433,34 +353,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cbigint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 27912 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3115 Data size: 21772 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 56d39d9..320b14d 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -16,18 +16,22 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,14 +100,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,18 +155,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -173,9 +185,23 @@ STAGE PLANS: Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], VectorUDFAdaptor[4:Long](VectorCoalesce[3:string](IdentityExpression[0:string] ConstantVectorExpression[2:string])) + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[4:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[1:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -184,9 +210,27 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -228,14 +272,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -250,15 +298,34 @@ STAGE PLANS: Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorCoalesce[3:string](IdentityExpression[0:string] ConstantVectorExpression[2:string]) + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_complex_all.q.out ql/src/test/results/clientpositive/vector_complex_all.q.out index a54a371..d61464f 100644 --- ql/src/test/results/clientpositive/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/vector_complex_all.q.out @@ -62,14 +62,18 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct PREHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- Since complex types are not supported, this query should not vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -92,6 +96,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of Column[mp] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -108,18 +118,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### orc_create_complex.str orc_create_complex.mp orc_create_complex.lst orc_create_complex.strct -line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a":"one","b":"two"} -line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} -line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} +line1 {"key13":"value13","key12":"value12","key11":"value11"} ["a","b","c"] {"a":"one","b":"two"} +line2 {"key21":"value21","key23":"value23","key22":"value22"} ["d","e","f"] {"a":"three","b":"four"} +line3 {"key33":"value33","key31":"value31","key32":"value32"} ["g","h","i"] {"a":"five","b":"six"} PREHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COUNT(*) FROM orc_create_complex PREHOOK: type: QUERY POSTHOOK: query: -- However, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT COUNT(*) FROM orc_create_complex POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -132,17 +146,47 @@ STAGE PLANS: alias: orc_create_complex Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -174,14 +218,18 @@ POSTHOOK: Input: default@orc_create_complex c0 3 PREHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str FROM orc_create_complex ORDER BY str PREHOOK: type: QUERY POSTHOOK: query: -- Also, since this query is not referencing the complex fields, it should vectorize. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT str FROM orc_create_complex ORDER BY str POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -196,12 +244,36 @@ STAGE PLANS: Select Operator expressions: str (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index 002cdeb..c9f7ea7 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -21,13 +21,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] c0 c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -85,6 +89,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported + vectorized: false Local Work: Map Reduce Local Work @@ -142,13 +152,17 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -202,6 +216,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: UDF GenericUDFIndex(Column[a], Const int 1) not supported + vectorized: false Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_const.q.out ql/src/test/results/clientpositive/vector_const.q.out index cdb86fa..4933bf1 100644 --- ql/src/test/results/clientpositive/vector_const.q.out +++ ql/src/test/results/clientpositive/vector_const.q.out @@ -15,12 +15,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@varchar_const_1 POSTHOOK: Lineage: varchar_const_1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -35,15 +39,34 @@ STAGE PLANS: Select Operator expressions: 'FF' (type: varchar(4)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[1:varchar(4)] + vectorized: true Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index 734ef39..0123350 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -47,10 +47,14 @@ POSTHOOK: Input: default@abcd 12 100 75 7 12 NULL 80 2 NULL 35 23 6 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -58,47 +62,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: abcd - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - keys: a (type: int), b (type: int), c (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY @@ -112,10 +91,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -123,45 +106,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: abcd - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) - keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: ++++ - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY @@ -172,10 +132,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 -PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY -POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: query: explain vectorization select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -183,41 +147,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: abcd - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int), b (type: int), c (type: int) - sort order: +++ - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: d (type: int) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a PREHOOK: type: QUERY @@ -231,10 +176,14 @@ POSTHOOK: Input: default@abcd 100 1 1 3 12 1 2 9 NULL 1 1 6 -PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY -POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: query: explain vectorization select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,38 +191,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: abcd - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) - sort order: ++++ - Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_count_distinct.q.out ql/src/test/results/clientpositive/vector_count_distinct.q.out index 22ab392..8bfd7b6 100644 --- ql/src/test/results/clientpositive/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/vector_count_distinct.q.out @@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain +explain vectorization detail select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1253,9 +1257,23 @@ STAGE PLANS: Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[16:int] + vectorized: true Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT ws_order_number) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[16:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[16:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1263,8 +1281,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) diff --git ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out index f639342..e51c0e4 100644 --- ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out +++ ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out @@ -22,12 +22,16 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@testorc1 POSTHOOK: Lineage: testorc1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: testorc1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select id, UDFHelloTest(name) from testorc1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select id, UDFHelloTest(name) from testorc1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -42,15 +46,34 @@ STAGE PLANS: Select Operator expressions: id (type: int), Hello... (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], VectorUDFAdaptor[2:String] + vectorized: true Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 804d9e6..cd36f1f 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -108,41 +112,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - ListSink PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY @@ -183,10 +155,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -17045922556 -PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +PREHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 +POSTHOOK: query: EXPLAIN VECTORIZATION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -194,42 +170,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - ListSink PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_date_1.q.out ql/src/test/results/clientpositive/vector_date_1.q.out index df3efa7..ade241d 100644 --- ql/src/test/results/clientpositive/vector_date_1.q.out +++ ql/src/test/results/clientpositive/vector_date_1.q.out @@ -47,7 +47,7 @@ POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] PREHOOK: query: -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -62,7 +62,7 @@ select from vector_date_1 order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -76,6 +76,10 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -90,13 +94,37 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], LongColEqualLongColumn[2:long], LongColNotEqualLongColumn[3:long], LongColLessEqualLongColumn[4:long], LongColLessEqualLongColumn[5:long], LongColLessLongColumn[6:long], LongColGreaterEqualLongColumn[7:long], LongColGreaterEqualLongColumn[8:long], LongColGreaterLongColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) @@ -149,7 +177,7 @@ POSTHOOK: Input: default@vector_date_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -163,7 +191,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -177,6 +205,10 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,13 +223,37 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date], LongColNotEqualLongColumn[2:long], LongColEqualLongColumn[3:long], LongColLessLongColumn[4:long], LongColGreaterEqualLongColumn[5:long], LongColGreaterLongColumn[6:long], LongColGreaterLongColumn[7:long], LongColLessEqualLongColumn[8:long], LongColLessLongColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) @@ -251,7 +307,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false PREHOOK: query: -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -266,7 +322,7 @@ select from vector_date_1 order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -280,6 +336,10 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -294,13 +354,37 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColNotEqualDateScalar[2:long], DateColGreaterEqualDateScalar[3:long], DateColGreaterDateScalar[4:long], DateColLessEqualDateScalar[5:long], DateColLessDateScalar[6:long], DateScalarNotEqualDateColumn[7:long], DateScalarLessEqualDateColumn[8:long], DateScalarLessDateColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -353,7 +437,7 @@ POSTHOOK: Input: default@vector_date_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -367,7 +451,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -381,6 +465,10 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -395,13 +483,37 @@ STAGE PLANS: Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColEqualDateScalar[2:long], DateColLessEqualDateScalar[3:long], DateColLessDateScalar[4:long], DateColGreaterEqualDateScalar[5:long], DateColGreaterDateScalar[6:long], DateScalarEqualDateColumn[7:long], DateScalarGreaterEqualDateColumn[8:long], DateScalarGreaterDateColumn[9:long] + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -456,7 +568,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 false false false false false false false false PREHOOK: query: -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -471,7 +583,7 @@ order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -484,6 +596,10 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -496,18 +612,48 @@ STAGE PLANS: alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterLongColNotEqualLongColumn[-1:boolean] FilterLongColLessLongColumn[-1:boolean] FilterLongColLessEqualLongColumn[-1:boolean] FilterLongColGreaterLongColumn[-1:boolean] FilterLongColGreaterEqualLongColumn[-1:boolean]) + vectorized: true predicate: ((dt1 = dt1) and (dt1 <> dt2) and (dt1 < dt2) and (dt1 <= dt2) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) @@ -559,7 +705,7 @@ POSTHOOK: Input: default@vector_date_1 2001-01-01 2001-06-01 PREHOOK: query: -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -576,7 +722,7 @@ order by dt1 PREHOOK: type: QUERY POSTHOOK: query: -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -591,6 +737,10 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -603,17 +753,47 @@ STAGE PLANS: alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDateColEqualDateScalar[-1:boolean] FilterDateScalarEqualDateColumn[-1:boolean] FilterDateColNotEqualDateScalar[-1:boolean] FilterDateScalarNotEqualDateColumn[-1:boolean] FilterDateColGreaterDateScalar[-1:boolean] FilterDateColGreaterEqualDateScalar[-1:boolean] FilterDateScalarLessDateColumn[-1:boolean] FilterDateScalarLessEqualDateColumn[-1:boolean]) + vectorized: true predicate: ((dt1 = 2001-01-01) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01) and (1970-01-01 <> dt1) and (dt1 > 1970-01-01) and (dt1 >= 1970-01-01) and (1970-01-01 < dt1) and (1970-01-01 <= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt2 (type: date) outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date] + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 2001-01-01 (type: date), VALUE._col0 (type: date) diff --git ql/src/test/results/clientpositive/vector_decimal_1.q.out ql/src/test/results/clientpositive/vector_decimal_1.q.out index 84c20c9..9fb701d 100644 --- ql/src/test/results/clientpositive/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/vector_decimal_1.q.out @@ -32,12 +32,16 @@ POSTHOOK: Output: default@decimal_1 POSTHOOK: Lineage: decimal_1.t EXPRESSION [] POSTHOOK: Lineage: decimal_1.u EXPRESSION [] POSTHOOK: Lineage: decimal_1.v EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -52,12 +56,36 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) @@ -86,12 +114,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,12 +138,36 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) @@ -140,12 +196,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -160,12 +220,36 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) @@ -194,12 +278,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -214,12 +302,36 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -248,12 +360,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -268,12 +384,36 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[3:long] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -302,12 +442,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -322,12 +466,36 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[3:double] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) @@ -356,12 +524,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -376,12 +548,36 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[3:double] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -410,12 +606,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -430,12 +630,36 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[3:String] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -464,12 +688,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -484,12 +712,36 @@ STAGE PLANS: Select Operator expressions: CAST( t AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToTimestamp[3:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) diff --git ql/src/test/results/clientpositive/vector_decimal_10_0.q.out ql/src/test/results/clientpositive/vector_decimal_10_0.q.out index 2ee396b..0e42a2d 100644 --- ql/src/test/results/clientpositive/vector_decimal_10_0.q.out +++ ql/src/test/results/clientpositive/vector_decimal_10_0.q.out @@ -33,12 +33,16 @@ POSTHOOK: Input: default@decimal_txt POSTHOOK: Output: database:default POSTHOOK: Output: default@DECIMAL POSTHOOK: Lineage: decimal.dec SIMPLE [(decimal_txt)decimal_txt.FieldSchema(name:dec, type:decimal(10,0), comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dec FROM `DECIMAL` order by dec PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dec FROM `DECIMAL` order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -53,12 +57,36 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)) diff --git ql/src/test/results/clientpositive/vector_decimal_2.q.out ql/src/test/results/clientpositive/vector_decimal_2.q.out index 5cad43a..de6ea23 100644 --- ql/src/test/results/clientpositive/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_2.q.out @@ -21,12 +21,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@decimal_2 POSTHOOK: Lineage: decimal_2.t EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -41,12 +45,36 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) @@ -75,12 +103,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -95,12 +127,36 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) @@ -129,12 +185,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -149,12 +209,36 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) @@ -183,12 +267,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -203,12 +291,36 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -237,12 +349,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -257,12 +373,36 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -291,12 +431,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,12 +455,36 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) @@ -345,12 +513,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -365,12 +537,36 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -399,12 +595,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 17.29 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -419,12 +619,36 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[1:String] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -464,12 +688,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@decimal_2 POSTHOOK: Lineage: decimal_2.t EXPRESSION [] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -484,12 +712,36 @@ STAGE PLANS: Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToBoolean[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) @@ -518,12 +770,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -538,12 +794,36 @@ STAGE PLANS: Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) @@ -572,12 +852,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 13 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -592,12 +876,36 @@ STAGE PLANS: Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) @@ -626,12 +934,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### -3827 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as int) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -646,12 +958,36 @@ STAGE PLANS: Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -680,12 +1016,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -700,12 +1040,36 @@ STAGE PLANS: Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToLong[1:long] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -734,12 +1098,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as float) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -754,12 +1122,36 @@ STAGE PLANS: Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float) @@ -788,12 +1180,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as double) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -808,12 +1204,36 @@ STAGE PLANS: Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToDouble[1:double] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -842,12 +1262,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5044003 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(t as string) from decimal_2 order by t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -862,12 +1286,36 @@ STAGE PLANS: Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastDecimalToString[1:String] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -896,12 +1344,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3404045.5044003 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3.14 as decimal(4,2)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -914,11 +1366,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3.14 (type: decimal(4,2)) @@ -947,12 +1422,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3.14 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast(3.14 as float) as decimal(4,2)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -965,11 +1444,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3.14 (type: decimal(4,2)) @@ -998,12 +1500,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3.14 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1016,11 +1522,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 1355944339.1234567 (type: decimal(30,8)) @@ -1049,12 +1578,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1355944339.12345670 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1067,11 +1600,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(10,0)) @@ -1091,12 +1647,16 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(true as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1109,11 +1669,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(10,0)) @@ -1142,12 +1725,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3Y as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3Y as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1160,11 +1747,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) @@ -1193,12 +1803,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3S as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3S as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1211,11 +1825,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) @@ -1244,12 +1881,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(cast(3 as int) as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(cast(3 as int) as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1262,11 +1903,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) @@ -1295,12 +1959,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(3L as decimal) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(3L as decimal) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1313,11 +1981,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 3 (type: decimal(10,0)) @@ -1346,12 +2037,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast(0.99999999999999999999 as decimal(20,19)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1364,11 +2059,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 1 (type: decimal(20,19)) @@ -1397,12 +2115,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_2 #### A masked pattern was here #### 1.0000000000000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select cast('0.99999999999999999999' as decimal(20,20)) as c from decimal_2 order by c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1415,11 +2137,34 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 0.99999999999999999999 (type: decimal(20,20)) diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index b022435..78a96c4 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby @@ -33,13 +33,17 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- First only do simple aggregations that output primitives only -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -47,54 +51,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -123,7 +95,7 @@ POSTHOOK: Input: default@decimal_vgby 762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 PREHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby @@ -131,13 +103,17 @@ EXPLAIN SELECT cint, HAVING COUNT(*) > 1 PREHOOK: type: QUERY POSTHOOK: query: -- Now add the others... -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) FROM decimal_vgby GROUP BY cint HAVING COUNT(*) > 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,54 +121,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), diff --git ql/src/test/results/clientpositive/vector_decimal_cast.q.out ql/src/test/results/clientpositive/vector_decimal_cast.q.out index aee5e02..6ac463d 100644 --- ql/src/test/results/clientpositive/vector_decimal_cast.q.out +++ ql/src/test/results/clientpositive/vector_decimal_cast.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9,34 +13,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/vector_decimal_expressions.q.out index 8af5a1c..086a88e 100644 --- ql/src/test/results/clientpositive/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/vector_decimal_expressions.q.out @@ -15,14 +15,18 @@ POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30,44 +34,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_test - Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cdecimal1 > 0) and (cdecimal1 < 12345.5678) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,23)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,28)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,23)), _col3 (type: decimal(38,28)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) - sort order: ++++++++++++++ - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,23)), KEY.reducesinkkey3 (type: decimal(38,28)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 diff --git ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out index c69b7af..62ac025 100644 --- ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out @@ -72,12 +72,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -114,11 +118,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[1:long]) + vectorized: true predicate: dec is not null (type: boolean) Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(4,2)] + vectorized: true Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -126,16 +142,36 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(6,2)) 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + vectorized: true outputColumnNames: _col0, _col1 Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out index bce8f4f..321b9cb 100644 --- ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out @@ -14,7 +14,7 @@ POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdecimal1 ,Round(cdecimal1, 2) @@ -53,7 +53,7 @@ and sin(cdecimal1) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdecimal1 ,Round(cdecimal1, 2) @@ -90,6 +90,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cdecimal1) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,31 +101,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_test - Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select cdecimal1 diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index c5f9e4d..a3fda2e 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -545,10 +545,14 @@ NULL NULL 123456789.0123456789 15241578753238836.75019051998750190521 1234567890.1234560000 NULL 1234567890.1234567890 NULL -PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -556,43 +560,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_precision - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: dec (type: decimal(20,10)) - outputColumnNames: dec - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(dec), sum(dec) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_decimal_round.q.out ql/src/test/results/clientpositive/vector_decimal_round.q.out index 7dc885e..a33369e 100644 --- ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -30,12 +30,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,6 +60,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -84,12 +96,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_txt #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_txt order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,6 +126,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -166,12 +190,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -192,6 +220,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -220,12 +256,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_rc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_rc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,6 +286,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) @@ -302,12 +350,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by dec PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by dec POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -322,13 +374,37 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)], FuncRoundWithNumDigitsDecimalToDecimal[1:decimal(11,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) @@ -357,12 +433,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_orc #### A masked pattern was here #### 101 100 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dec, round(dec, -1) from decimal_tbl_orc order by round(dec, -1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -377,13 +457,37 @@ STAGE PLANS: Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:decimal(10,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) diff --git ql/src/test/results/clientpositive/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/vector_decimal_round_2.q.out index 4924bff..5f045b4 100644 --- ql/src/test/results/clientpositive/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round_2.q.out @@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), @@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0) -- FROM decimal_tbl_1_orc ORDER BY dec; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3), round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4), round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8) FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,13 +76,37 @@ STAGE PLANS: Select Operator expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundDecimalToDecimal[1:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[12:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)) @@ -144,7 +172,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### 125.315000000000000000 -125.315000000000000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -154,7 +182,7 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos) as p, round(pos, 0), round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4), @@ -164,6 +192,10 @@ SELECT round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4) FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -178,13 +210,37 @@ STAGE PLANS: Select Operator expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(21,0)], FuncRoundDecimalToDecimal[12:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[14:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[15:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[16:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[17:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[18:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[19:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[20:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[21:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0)) @@ -255,7 +311,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### 3.141592653589793000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -276,7 +332,7 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(dec, -15) as d, round(dec, -16), round(dec, -13), round(dec, -14), @@ -297,6 +353,10 @@ SELECT round(dec, 15), round(dec, 16) FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,13 +371,37 @@ STAGE PLANS: Select Operator expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15) (type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0)) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal[1:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[4:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[5:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[6:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[7:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[8:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[9:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[10:decimal(22,1)], FuncRoundWithNumDigitsDecimalToDecimal[11:decimal(23,2)], FuncRoundWithNumDigitsDecimalToDecimal[12:decimal(24,3)], FuncRoundWithNumDigitsDecimalToDecimal[13:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[14:decimal(25,4)], FuncRoundWithNumDigitsDecimalToDecimal[15:decimal(26,5)], FuncRoundWithNumDigitsDecimalToDecimal[16:decimal(27,6)], FuncRoundWithNumDigitsDecimalToDecimal[17:decimal(28,7)], FuncRoundWithNumDigitsDecimalToDecimal[18:decimal(29,8)], FuncRoundWithNumDigitsDecimalToDecimal[19:decimal(30,9)], FuncRoundWithNumDigitsDecimalToDecimal[20:decimal(31,10)], FuncRoundWithNumDigitsDecimalToDecimal[21:decimal(32,11)], FuncRoundWithNumDigitsDecimalToDecimal[22:decimal(33,12)], FuncRoundWithNumDigitsDecimalToDecimal[23:decimal(34,13)], FuncRoundWithNumDigitsDecimalToDecimal[24:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[25:decimal(35,14)], FuncRoundWithNumDigitsDecimalToDecimal[26:decimal(36,15)], FuncRoundWithNumDigitsDecimalToDecimal[27:decimal(37,16)], FuncRoundWithNumDigitsDecimalToDecimal[28:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[29:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[30:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[31:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[32:decimal(21,0)], FuncRoundWithNumDigitsDecimalToDecimal[33:decimal(21,0)] + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)), VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16)) @@ -411,14 +495,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_4_orc #### A masked pattern was here #### 1809242.315111134400000000 -1809242.315111134400000000 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9) FROM decimal_tbl_4_orc ORDER BY p POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -433,13 +521,37 @@ STAGE PLANS: Select Operator expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal[2:decimal(30,9)], FuncRoundWithNumDigitsDecimalToDecimal[3:decimal(30,9)] + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 1fbc05a..8585043 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- addition -EXPLAIN SELECT key + key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -61,28 +65,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + key) (type: decimal(21,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -130,10 +124,14 @@ NULL 2.0000000000 -2469135780.2469135780 2469135780.2469135600 -PREHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,28 +139,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -210,10 +198,14 @@ NULL 2.0000000000 -2469135780.1234567890 2469135780.1234567800 -PREHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -221,28 +213,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -290,10 +272,14 @@ NULL 1.5 -1.8518518351234567E9 1.8518518351234567E9 -PREHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -301,28 +287,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) + 1.0) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -371,11 +347,15 @@ NULL -1.2345678891234567E9 1.2345678911234567E9 PREHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- substraction -EXPLAIN SELECT key - key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -383,28 +363,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key - key) (type: decimal(21,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -452,10 +422,14 @@ NULL 0.0000000000 0.0000000000 0.0000000000 -PREHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -463,28 +437,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -532,10 +496,14 @@ NULL 0.0000000000 -0.1234567890 0.1234567800 -PREHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -543,28 +511,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -612,10 +570,14 @@ NULL 0.5 -6.172839451234567E8 6.172839451234567E8 -PREHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -623,28 +585,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) - 1.0) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -693,11 +645,15 @@ NULL -1.2345678911234567E9 1.2345678891234567E9 PREHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- multiplication -EXPLAIN SELECT key * key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -705,28 +661,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key * key) (type: decimal(38,20)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -774,10 +720,14 @@ NULL 1.00000000000000000000 NULL NULL -PREHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM DECIMAL_UDF where key * value > 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key, value FROM DECIMAL_UDF where key * value > 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -785,31 +735,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY @@ -842,10 +779,14 @@ POSTHOOK: Input: default@decimal_udf 1.0000000000 1 -1234567890.1234567890 -1234567890 1234567890.1234567800 1234567890 -PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * value FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -853,28 +794,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -922,10 +853,14 @@ NULL 1.0000000000 1524157875171467887.5019052100 1524157875171467876.3907942000 -PREHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -933,28 +868,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1002,10 +927,14 @@ NULL 0.5 7.6207893758573389E17 7.6207893758573389E17 -PREHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1013,28 +942,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) * 2.0) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1083,11 +1002,15 @@ NULL -2.4691357802469134E9 2.4691357802469134E9 PREHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- division -EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +EXPLAIN VECTORIZATION SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1095,31 +1018,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key / 0) (type: decimal(28,18)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 1 - Processor Tree: - ListSink PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY @@ -1130,10 +1040,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / NULL FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1141,30 +1055,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) / null) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: org.apache.hadoop.hive.ql.metadata.HiveException: Could not instantiate DoubleColDivideDoubleScalar with arguments arguments: [0, ConstantVectorExpression[1:double], 2], argument classes: [Integer, ConstantVectorExpression, Integer], exception: java.lang.IllegalArgumentException +#### A masked pattern was here #### + + vectorized: false Stage: Stage-0 - Fetch Operator limit: 1 - Processor Tree: - ListSink PREHOOK: query: SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY @@ -1175,10 +1076,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1186,31 +1091,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key / key) (type: decimal(38,24)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY @@ -1254,10 +1146,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000000 1.000000000000000000000000 1.000000000000000000000000 -PREHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1265,31 +1161,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY @@ -1323,10 +1206,14 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000 1.000000000100000000000 1.000000000099999992710 -PREHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1334,31 +1221,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY @@ -1392,10 +1266,14 @@ POSTHOOK: Input: default@decimal_udf 2.0 2.0000000002 2.0000000002 -PREHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +PREHOOK: query: EXPLAIN VECTORIZATION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT 1 + (key / '2.0') FROM DECIMAL_UDF +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT 1 + (key / '2.0') FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1403,28 +1281,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (1.0 + (UDFToDouble(key) / 2.0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1473,11 +1341,15 @@ NULL -6.172839440617284E8 6.172839460617284E8 PREHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- abs -EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1485,28 +1357,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: abs(key) (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1555,11 +1417,15 @@ NULL 1234567890.1234567890 1234567890.1234567800 PREHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY POSTHOOK: query: -- avg -EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +EXPLAIN VECTORIZATION SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1568,72 +1434,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: int), key (type: decimal(20,10)) - outputColumnNames: value, key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(key), count(key), avg(key) - keys: value (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,23)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(38,23)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,23)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY @@ -1661,11 +1488,15 @@ POSTHOOK: Input: default@decimal_udf 4400 -4400.00000000000000000000000 -4400.00000000000000 -4400.0000000000 1234567890 1234567890.12345678000000000000000 1234567890.12345678000000 1234567890.1234567800 PREHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- negative -EXPLAIN SELECT -key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1673,28 +1504,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (- key) (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1743,27 +1564,21 @@ NULL 1234567890.1234567890 -1234567890.1234567800 PREHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- positive -EXPLAIN SELECT +key FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - ListSink PREHOOK: query: SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1824,28 +1639,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ceil(key) (type: decimal(11,0)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT CEIL(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1894,11 +1691,15 @@ NULL -1234567890 1234567891 PREHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- floor -EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1906,28 +1707,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: floor(key) (type: decimal(11,0)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1976,11 +1767,15 @@ NULL -1234567891 1234567890 PREHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- round -EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1988,28 +1783,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: round(key, 2) (type: decimal(13,2)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2058,11 +1843,15 @@ NULL -1234567890.12 1234567890.12 PREHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- power -EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2070,28 +1859,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: power(key, 2) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2140,11 +1919,15 @@ NULL 1.52415787532388352E18 1.52415787532388352E18 PREHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- modulo -EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2152,28 +1935,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ((key + 1) % (key / 2)) (type: decimal(28,18)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2222,11 +1995,15 @@ NULL -617283944.061728394500000000 1.000000000000000000 PREHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev, var -EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2234,47 +2011,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: int), key (type: decimal(20,10)) - outputColumnNames: value, key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev(key), variance(key) - keys: value (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev(VALUE._col0), variance(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY @@ -2302,11 +2054,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY POSTHOOK: query: -- stddev_samp, var_samp -EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +EXPLAIN VECTORIZATION SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2314,47 +2070,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: int), key (type: decimal(20,10)) - outputColumnNames: value, key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(key), var_samp(key) - keys: value (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY @@ -2382,11 +2113,15 @@ POSTHOOK: Input: default@decimal_udf 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- histogram -EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2394,42 +2129,19 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: histogram_numeric(_col0, 3) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array) - Reduce Operator Tree: - Group By Operator - aggregations: histogram_numeric(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF histogram_numeric not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2441,11 +2153,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### [{"x":-1.2345678901234567E9,"y":1.0},{"x":-144.50057142857142,"y":35.0},{"x":1.2345678901234567E9,"y":1.0}] PREHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MIN(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- min -EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MIN(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2453,43 +2169,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT MIN(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2501,11 +2196,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1234567890.1234567890 PREHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MAX(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- max -EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT MAX(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2513,43 +2212,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT MAX(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2561,11 +2239,15 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 1234567890.1234567800 PREHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT COUNT(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: -- count -EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF +EXPLAIN VECTORIZATION SELECT COUNT(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2573,43 +2255,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT COUNT(key) FROM DECIMAL_UDF PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 4e24fa6..43e40aa 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,20 +72,45 @@ STAGE PLANS: alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterDecimalColEqualDecimalScalar[-1:boolean] + vectorized: true predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:double], ConstantVectorExpression[3:double], ConstantVectorExpression[4:double], ConstantVectorExpression[5:double], ConstantVectorExpression[6:double], ConstantVectorExpression[7:double], ConstantVectorExpression[8:double] + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -100,20 +129,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), log(key), log(key, key), log(key, value), log(value, key), log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,20 +159,45 @@ STAGE PLANS: alias: decimal_udf2 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterDecimalColEqualDecimalScalar[-1:boolean] + vectorized: true predicate: (key = 10) (type: boolean) Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:double], ConstantVectorExpression[3:double], ConstantVectorExpression[4:double], ConstantVectorExpression[5:double], FuncLogWithBaseLongToDouble[6:double], VectorUDFAdaptor[7:Double], ConstantVectorExpression[8:double], ConstantVectorExpression[9:double] + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index dba2e6e..7a46125 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select distinct s, t from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,8 +129,21 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: t, s + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -135,8 +152,26 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_elt.q.out ql/src/test/results/clientpositive/vector_elt.q.out index 08ca167..684e1d7 100644 --- ql/src/test/results/clientpositive/vector_elt.q.out +++ ql/src/test/results/clientpositive/vector_elt.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +PREHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc WHERE ctinyint > 0 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -13,34 +17,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ctinyint > 0) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint) FROM alltypesorc @@ -64,7 +52,7 @@ POSTHOOK: Input: default@alltypesorc 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 2 cvLH6Eat2yFsyy7p 528534767 528534767 1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -77,7 +65,7 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), elt('1', 'abc', 'defg'), @@ -90,6 +78,10 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 8f694da..8a723db 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- HIVE- -explain +explain vectorization detail select count (distinct cint) from alltypesorc where cstring1 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE- -explain +explain vectorization detail select count (distinct cint) from alltypesorc where cstring1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -18,14 +22,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsTrue[-1:boolean](CastLongToBooleanViaLongToLong[13:long](StringLength[12:Long])) + vectorized: true predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -33,8 +57,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -64,12 +106,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6041 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -82,10 +128,24 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsTrue[-1:boolean](CastLongToBooleanViaLongToLong[12:long]) + vectorized: true predicate: cint (type: int) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -93,8 +153,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -124,12 +202,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 6082 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where cfloat PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -142,14 +224,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsTrue[-1:boolean](CastDoubleToBooleanViaDoubleToLong[12:long]) + vectorized: true predicate: cfloat (type: float) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -157,8 +259,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) @@ -188,12 +308,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 3022 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count (distinct cint) from alltypesorc where ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -206,14 +330,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsTrue[-1:boolean](CastTimestampToBoolean[12:long]) + vectorized: true predicate: ctimestamp1 (type: timestamp) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -221,8 +365,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 8041511..802d071 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -46,13 +50,37 @@ STAGE PLANS: Select Operator expressions: substr(key, 1, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringSubstrColStartLen[2:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -75,6 +103,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index 63fe8f3..3e4e3e0 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -46,13 +50,37 @@ STAGE PLANS: Select Operator expressions: substr(value, 5, 1) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringSubstrColStartLen[2:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -75,6 +103,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index 38eeeef..e446a70 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s, t, max(b) from vectortab2korc group by s, t POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,9 +129,23 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), s (type: string), b (type: bigint) outputColumnNames: t, s, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[8:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: t (type: tinyint), s (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -136,9 +154,27 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 3468657..010506d 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,6 +1,6 @@ Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization detail select * from src where not key in @@ -8,13 +8,17 @@ where not key in order by key PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. -explain +explain vectorization detail select * from src where not key in (select key from src) order by key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-8 depends on stages: Stage-4 @@ -42,6 +46,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -127,8 +139,16 @@ STAGE PLANS: sort order: + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 9e0abd4..98e4925 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -211,7 +211,7 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number from @@ -219,7 +219,7 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number from @@ -227,6 +227,10 @@ from group by ss_ticket_number limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -241,8 +245,21 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -251,9 +268,27 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -315,7 +350,7 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(ss_ticket_number) m from @@ -327,7 +362,7 @@ from group by ss_ticket_number order by m PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(ss_ticket_number) m from @@ -339,6 +374,10 @@ from group by ss_ticket_number order by m POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -354,8 +393,21 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 @@ -364,8 +416,26 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -397,6 +467,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -524,7 +602,7 @@ POSTHOOK: Input: default@store_sales 80 81 82 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -536,7 +614,7 @@ from group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number, sum(ss_item_sk), sum(q) from @@ -548,6 +626,10 @@ from group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -563,9 +645,23 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int], IdentityExpression[2:int], IdentityExpression[10:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[10:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int], IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -574,9 +670,27 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -610,6 +724,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) @@ -737,7 +859,7 @@ POSTHOOK: Input: default@store_sales 80 151471 704 81 105109 429 82 55611 254 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ss_ticket_number, ss_item_sk, sum(q) from @@ -749,7 +871,7 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ss_ticket_number, ss_item_sk, sum(q) from @@ -761,6 +883,10 @@ from group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -776,9 +902,23 @@ STAGE PLANS: Select Operator expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[9:int], IdentityExpression[2:int], IdentityExpression[10:int] + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[10:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[9:int], IdentityExpression[2:int] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -787,9 +927,27 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -823,6 +981,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 4207c19..09bc663 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -127,16 +127,20 @@ POSTHOOK: Lineage: store.s_street_type SIMPLE [(store_txt)store_txt.FieldSchema( POSTHOOK: Lineage: store.s_suite_number SIMPLE [(store_txt)store_txt.FieldSchema(name:s_suite_number, type:string, comment:null), ] POSTHOOK: Lineage: store.s_tax_precentage SIMPLE [(store_txt)store_txt.FieldSchema(name:s_tax_precentage, type:decimal(5,2), comment:null), ] POSTHOOK: Lineage: store.s_zip SIMPLE [(store_txt)store_txt.FieldSchema(name:s_zip, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s_store_id from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s_store_id from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,6 +166,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -202,16 +216,20 @@ AAAAAAAAEAAAAAAA AAAAAAAAHAAAAAAA AAAAAAAAIAAAAAAA AAAAAAAAKAAAAAAA -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s_store_id, GROUPING__ID from store group by s_store_id with rollup PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s_store_id, GROUPING__ID from store group by s_store_id with rollup POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -237,6 +255,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: GROUPBY operator: Grouping sets not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index 0a82be9..b31a7be 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, IF (cboolean1, 'first', 'second') FROM alltypesorc WHERE cboolean1 IS NOT NULL AND cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -16,18 +20,48 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsTrue[-1:boolean] SelectColumnIsNotNull[-1:boolean]) + vectorized: true predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IfExprStringScalarStringScalar[12:String] + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 697d422..f756c30 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -181,16 +181,20 @@ POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographic POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] Warning: Map Join MAPJOIN[15][bigTable=store_sales] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -225,25 +229,67 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Not empty key IS false + vectorized: true outputColumnNames: _col0, _col2, _col16 Statistics: Num rows: 200000 Data size: 92055200 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterStringGroupColEqualStringScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean] FilterStringGroupColEqualStringScalar[-1:boolean])) + vectorized: true predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100000 Data size: 46027600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression[3:long]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_inner_join.q.out ql/src/test/results/clientpositive/vector_inner_join.q.out index e3e0cf4..c7827a4 100644 --- ql/src/test/results/clientpositive/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/vector_inner_join.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -48,7 +52,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -69,45 +72,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -120,12 +98,16 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -136,7 +118,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -162,41 +143,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY @@ -245,12 +205,16 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -261,7 +225,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -282,45 +245,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -333,12 +271,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -349,7 +291,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:t1 @@ -370,41 +311,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -417,12 +337,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -433,7 +357,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -454,45 +377,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -505,12 +403,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -521,7 +423,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -542,45 +443,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -593,12 +469,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -609,7 +489,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:t1 @@ -630,45 +509,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -681,12 +535,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -697,7 +555,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:t1 @@ -718,45 +575,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY @@ -769,12 +601,16 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization summary select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -785,7 +621,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:t1 @@ -806,45 +641,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_interval_1.q.out ql/src/test/results/clientpositive/vector_interval_1.q.out index 373a6de..f124978 100644 --- ql/src/test/results/clientpositive/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/vector_interval_1.q.out @@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] PREHOOK: query: -- constants/cast from string -explain +explain vectorization detail select str1, interval '1-2' year to month, interval_year_month(str1), @@ -47,13 +47,17 @@ select from vector_interval_1 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- constants/cast from string -explain +explain vectorization detail select str1, interval '1-2' year to month, interval_year_month(str1), interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,13 +72,37 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) outputColumnNames: _col0, _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], CastStringToIntervalYearMonth[4:interval_year_month], CastStringToIntervalDayTime[5:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time) @@ -113,7 +141,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: -- interval arithmetic -explain +explain vectorization detail select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -125,7 +153,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- interval arithmetic -explain +explain vectorization detail select dt, interval '1-2' year to month + interval '1-2' year to month, @@ -136,6 +164,10 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -150,13 +182,37 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], IntervalYearMonthColAddIntervalYearMonthColumn[6:long](CastStringToIntervalYearMonth[4:interval_year_month] CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddIntervalYearMonthColumn[5:long](CastStringToIntervalYearMonth[4:interval_year_month]), IntervalYearMonthColSubtractIntervalYearMonthColumn[8:long](CastStringToIntervalYearMonth[4:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthScalarSubtractIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[4:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month) @@ -202,7 +258,7 @@ POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -213,7 +269,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt, interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, @@ -224,6 +280,10 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -238,13 +298,37 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], IntervalDayTimeColAddIntervalDayTimeColumn[6:interval_day_time](CastStringToIntervalDayTime[4:interval_day_time] CastStringToIntervalDayTime[5:interval_day_time]), IntervalDayTimeScalarAddIntervalDayTimeColumn[5:timestamp](CastStringToIntervalDayTime[4:interval_day_time]), IntervalDayTimeColSubtractIntervalDayTimeColumn[8:interval_day_time](CastStringToIntervalDayTime[4:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeScalarSubtractIntervalDayTimeColumn[7:timestamp](CastStringToIntervalDayTime[4:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) @@ -291,7 +375,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-interval arithmetic -explain +explain vectorization detail select dt, dt + interval '1-2' year to month, @@ -309,7 +393,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-interval arithmetic -explain +explain vectorization detail select dt, dt + interval '1-2' year to month, @@ -326,6 +410,10 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -340,13 +428,37 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], DateColAddIntervalYearMonthScalar[4:long], DateColAddIntervalYearMonthColumn[6:long](CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddDateColumn[7:long], IntervalYearMonthColAddDateColumn[8:long](CastStringToIntervalYearMonth[5:interval_year_month]), DateColSubtractIntervalYearMonthScalar[9:long], DateColSubtractIntervalYearMonthColumn[10:long](CastStringToIntervalYearMonth[5:interval_year_month]), DateColAddIntervalDayTimeScalar[11:timestamp], DateColAddIntervalDayTimeColumn[13:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), IntervalDayTimeScalarAddDateColumn[14:timestamp], IntervalDayTimeColAddDateColumn[15:interval_day_time](CastStringToIntervalDayTime[12:interval_day_time]), DateColSubtractIntervalDayTimeScalar[16:timestamp], DateColSubtractIntervalDayTimeColumn[17:timestamp](CastStringToIntervalDayTime[12:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -405,7 +517,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization detail select ts, ts + interval '1-2' year to month, @@ -423,7 +535,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-interval arithmetic -explain +explain vectorization detail select ts, ts + interval '1-2' year to month, @@ -440,6 +552,10 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -454,13 +570,37 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], TimestampColAddIntervalYearMonthScalar[4:timestamp], TimestampColAddIntervalYearMonthColumn[6:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), IntervalYearMonthScalarAddTimestampColumn[7:timestamp], IntervalYearMonthColAddTimestampColumn[8:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), TimestampColSubtractIntervalYearMonthScalar[9:timestamp], TimestampColSubtractIntervalYearMonthColumn[10:timestamp](CastStringToIntervalYearMonth[5:interval_year_month]), TimestampColAddIntervalDayTimeScalar[11:timestamp], TimestampColAddIntervalDayTimeColumn[13:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), IntervalDayTimeScalarAddTimestampColumn[14:timestamp], IntervalDayTimeColAddTimestampColumn[15:timestamp](CastStringToIntervalDayTime[12:interval_day_time]), TimestampColSubtractIntervalDayTimeScalar[16:timestamp], TimestampColSubtractIntervalDayTimeColumn[17:timestamp](CastStringToIntervalDayTime[12:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) @@ -519,7 +659,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization detail select ts, ts - ts, @@ -528,7 +668,7 @@ select from vector_interval_1 order by ts PREHOOK: type: QUERY POSTHOOK: query: -- timestamp-timestamp arithmetic -explain +explain vectorization detail select ts, ts - ts, @@ -536,6 +676,10 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -550,13 +694,37 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp], TimestampColSubtractTimestampColumn[4:interval_day_time], TimestampScalarSubtractTimestampColumn[5:timestamp], TimestampColSubtractTimestampScalar[6:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -597,7 +765,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-date arithmetic -explain +explain vectorization detail select dt, dt - dt, @@ -606,7 +774,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-date arithmetic -explain +explain vectorization detail select dt, dt - dt, @@ -614,6 +782,10 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -628,13 +800,37 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], DateColSubtractDateColumn[4:timestamp], DateScalarSubtractDateColumn[5:timestamp], DateColSubtractDateScalar[6:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -675,7 +871,7 @@ POSTHOOK: Input: default@vector_interval_1 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization detail select dt, ts - dt, @@ -687,7 +883,7 @@ select from vector_interval_1 order by dt PREHOOK: type: QUERY POSTHOOK: query: -- date-timestamp arithmetic -explain +explain vectorization detail select dt, ts - dt, @@ -698,6 +894,10 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -712,13 +912,37 @@ STAGE PLANS: Select Operator expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:date], TimestampColSubtractDateColumn[4:interval_day_time], TimestampScalarSubtractDateColumn[5:interval_day_time], TimestampColSubtractDateScalar[6:interval_day_time], DateColSubtractTimestampColumn[7:interval_day_time], DateColSubtractTimestampScalar[8:interval_day_time], DateScalarSubtractTimestampColumn[9:interval_day_time] + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) diff --git ql/src/test/results/clientpositive/vector_interval_2.q.out ql/src/test/results/clientpositive/vector_interval_2.q.out index 7f40f10..44c03c1 100644 --- ql/src/test/results/clientpositive/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/vector_interval_2.q.out @@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] PREHOOK: query: -- interval comparisons in select clause -explain +explain vectorization detail select str1, -- Should all be true @@ -78,7 +78,7 @@ from vector_interval_2 order by str1 PREHOOK: type: QUERY POSTHOOK: query: -- interval comparisons in select clause -explain +explain vectorization detail select str1, -- Should all be true @@ -110,6 +110,10 @@ select interval '1-2' year to month != interval_year_month(str2) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -124,13 +128,37 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], LongColEqualLongColumn[8:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[9:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[10:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessLongColumn[11:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterEqualLongColumn[12:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterEqualLongColumn[13:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterLongColumn[14:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColNotEqualLongColumn[15:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColEqualIntervalYearMonthScalar[16:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[17:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[18:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessIntervalYearMonthScalar[19:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[20:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[21:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterIntervalYearMonthScalar[22:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColNotEqualIntervalYearMonthScalar[23:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarEqualIntervalYearMonthColumn[24:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[25:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[26:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessIntervalYearMonthColumn[27:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[28:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[29:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterIntervalYearMonthColumn[30:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarNotEqualIntervalYearMonthColumn[31:long](CastStringToIntervalYearMonth[6:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) @@ -218,7 +246,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str1, -- Should all be false @@ -244,7 +272,7 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str1, -- Should all be false @@ -270,6 +298,10 @@ select interval '1-2' year to month != interval_year_month(str1) from vector_interval_2 order by str1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -284,13 +316,37 @@ STAGE PLANS: Select Operator expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:string], LongColNotEqualLongColumn[8:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColLessEqualIntervalYearMonthScalar[9:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColLessIntervalYearMonthScalar[10:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarNotEqualIntervalYearMonthColumn[11:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[12:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarGreaterIntervalYearMonthColumn[13:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessEqualIntervalYearMonthColumn[14:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthScalarLessIntervalYearMonthColumn[15:long](CastStringToIntervalYearMonth[6:interval_year_month]), LongColGreaterEqualLongColumn[16:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColGreaterLongColumn[17:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessEqualLongColumn[18:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), LongColLessLongColumn[19:long](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]), IntervalYearMonthColNotEqualIntervalYearMonthScalar[20:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterEqualIntervalYearMonthScalar[21:long](CastStringToIntervalYearMonth[6:interval_year_month]), IntervalYearMonthColGreaterIntervalYearMonthScalar[22:long](CastStringToIntervalYearMonth[6:interval_year_month]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) @@ -366,7 +422,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1-2 false false false false false false false false false false false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str3, -- Should all be true @@ -398,7 +454,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str3, -- Should all be true @@ -430,6 +486,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str4) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -444,13 +504,37 @@ STAGE PLANS: Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:string], IntervalDayTimeColEqualIntervalDayTimeColumn[8:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[9:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[10:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeColumn[11:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[12:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[13:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeColumn[14:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeColumn[15:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColEqualIntervalDayTimeScalar[16:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[17:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[18:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeScalar[19:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[20:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[21:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeScalar[22:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeScalar[23:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarEqualIntervalDayTimeColumn[24:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[25:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[26:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessIntervalDayTimeColumn[27:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[28:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[29:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterIntervalDayTimeColumn[30:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarNotEqualIntervalDayTimeColumn[31:long](CastStringToIntervalDayTime[6:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) @@ -538,7 +622,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str3, -- Should all be false @@ -564,7 +648,7 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str3, -- Should all be false @@ -590,6 +674,10 @@ select interval '1 2:3:4' day to second != interval_day_time(str3) from vector_interval_2 order by str3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -604,13 +692,37 @@ STAGE PLANS: Select Operator expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:string], IntervalDayTimeColNotEqualIntervalDayTimeColumn[8:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeScalar[9:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeScalar[10:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarNotEqualIntervalDayTimeColumn[11:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[12:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarGreaterIntervalDayTimeColumn[13:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessEqualIntervalDayTimeColumn[14:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeScalarLessIntervalDayTimeColumn[15:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeColumn[16:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeColumn[17:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessEqualIntervalDayTimeColumn[18:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColLessIntervalDayTimeColumn[19:long](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]), IntervalDayTimeColNotEqualIntervalDayTimeScalar[20:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterEqualIntervalDayTimeScalar[21:long](CastStringToIntervalDayTime[6:interval_day_time]), IntervalDayTimeColGreaterIntervalDayTimeScalar[22:long](CastStringToIntervalDayTime[6:interval_day_time]) + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) @@ -687,7 +799,7 @@ POSTHOOK: Input: default@vector_interval_2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 2:3:4 false false false false false false false false false false false false false false false false false false PREHOOK: query: -- interval expressions in predicates -explain +explain vectorization detail select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -713,7 +825,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- interval expressions in predicates -explain +explain vectorization detail select ts from vector_interval_2 where interval_year_month(str1) = interval_year_month(str1) @@ -738,6 +850,10 @@ where and interval '1-3' year to month > interval_year_month(str1) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -750,17 +866,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColNotEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColLessEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColLessLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColGreaterEqualLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterLongColGreaterLongColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month] CastStringToIntervalYearMonth[7:interval_year_month]) FilterIntervalYearMonthColEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColLessIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthColGreaterIntervalYearMonthScalar[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarLessIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month]) FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn[-1:boolean](CastStringToIntervalYearMonth[6:interval_year_month])) + vectorized: true predicate: ((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -833,7 +979,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -858,7 +1004,7 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where interval_day_time(str3) = interval_day_time(str3) @@ -883,6 +1029,10 @@ where and interval '1 2:3:5' day to second > interval_day_time(str3) order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -895,17 +1045,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterIntervalDayTimeColEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColLessIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColGreaterIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time] CastStringToIntervalDayTime[7:interval_day_time]) FilterIntervalDayTimeColEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColLessIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeColGreaterIntervalDayTimeScalar[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarLessIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time]) FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn[-1:boolean](CastStringToIntervalDayTime[6:interval_day_time])) + vectorized: true predicate: ((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -978,7 +1158,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -998,7 +1178,7 @@ where and dt != dt + interval '1-2' year to month order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where date '2002-03-01' = dt + interval_year_month(str1) @@ -1018,6 +1198,10 @@ where and dt != dt + interval '1-2' year to month order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1030,17 +1214,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDateScalarEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarLessEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarGreaterEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColLessEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateColGreaterEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterLongColNotEqualLongColumn[-1:boolean](DateColAddIntervalYearMonthColumn[7:long](CastStringToIntervalYearMonth[6:interval_year_month])) FilterDateScalarEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateScalarLessEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateScalarGreaterEqualDateColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColLessEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterDateColGreaterEqualDateScalar[-1:boolean](DateColAddIntervalYearMonthScalar[7:long]) FilterLongColNotEqualLongColumn[-1:boolean](DateColAddIntervalYearMonthScalar[7:long])) + vectorized: true predicate: ((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 = (dt + 1-2)) and (2002-03-01 <= (dt + 1-2)) and (2002-03-01 >= (dt + 1-2)) and ((dt + 1-2) = 2002-03-01) and ((dt + 1-2) <= 2002-03-01) and ((dt + 1-2) >= 2002-03-01) and (dt <> (dt + 1-2))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1103,7 +1317,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1128,7 +1342,7 @@ where and ts > ts - interval '1' year order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month @@ -1153,6 +1367,10 @@ where and ts > ts - interval '1' year order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1165,17 +1383,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](TimestampColAddIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalYearMonthScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalYearMonthScalar[6:timestamp])) + vectorized: true predicate: ((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2)) and (2002-03-01 01:02:03.0 >= (ts + 1-2)) and (2002-04-01 01:02:03.0 <> (ts + 1-2)) and (2002-02-01 01:02:03.0 < (ts + 1-2)) and (2002-04-01 01:02:03.0 > (ts + 1-2)) and ((ts + 1-2) = 2002-03-01 01:02:03.0) and ((ts + 1-2) >= 2002-03-01 01:02:03.0) and ((ts + 1-2) <= 2002-03-01 01:02:03.0) and ((ts + 1-2) <> 2002-04-01 01:02:03.0) and ((ts + 1-2) > 2002-02-01 01:02:03.0) and ((ts + 1-2) < 2002-04-01 01:02:03.0) and (ts = (ts + 0-0)) and (ts <> (ts + 1-0)) and (ts <= (ts + 1-0)) and (ts < (ts + 1-0)) and (ts >= (ts - 1-0)) and (ts > (ts - 1-0))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1249,7 +1497,7 @@ POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 PREHOOK: query: -- day to second expressions in predicate -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1275,7 +1523,7 @@ where order by ts PREHOOK: type: QUERY POSTHOOK: query: -- day to second expressions in predicate -explain +explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second @@ -1300,6 +1548,10 @@ where and ts > dt - interval '0 1:2:4' day to second order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1312,17 +1564,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](DateColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](DateColSubtractIntervalDayTimeScalar[6:timestamp])) + vectorized: true predicate: ((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000)) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000)) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0) and (ts = (dt + 0 01:02:03.000000000)) and (ts <> (dt + 0 01:02:04.000000000)) and (ts <= (dt + 0 01:02:03.000000000)) and (ts < (dt + 0 01:02:04.000000000)) and (ts >= (dt - 0 01:02:03.000000000)) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) @@ -1395,7 +1677,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_2 #### A masked pattern was here #### 2001-01-01 01:02:03 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1420,7 +1702,7 @@ where and ts > ts - interval '1' day order by ts PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ts from vector_interval_2 where timestamp '2001-01-01 01:02:03' = ts + interval '0' day @@ -1445,6 +1727,10 @@ where and ts > ts - interval '1' day order by ts POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1457,17 +1743,47 @@ STAGE PLANS: alias: vector_interval_2 Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterTimestampScalarEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarLessTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampScalarGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampScalar[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampScalar[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampScalar[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColNotEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessEqualTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColLessTimestampColumn[-1:boolean](TimestampColAddIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterEqualTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp]) FilterTimestampColGreaterTimestampColumn[-1:boolean](TimestampColSubtractIntervalDayTimeScalar[6:timestamp])) + vectorized: true predicate: ((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000)) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000)) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0) and (ts = (ts + 0 00:00:00.000000000)) and (ts <> (ts + 1 00:00:00.000000000)) and (ts <= (ts + 1 00:00:00.000000000)) and (ts < (ts + 1 00:00:00.000000000)) and (ts >= (ts - 1 00:00:00.000000000)) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 394 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) diff --git ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out index ff16b3b..e7a8e8b 100644 --- ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out +++ ql/src/test/results/clientpositive/vector_interval_arithmetic.q.out @@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION [(unique_timestamps) POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE [(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, comment:null), ] tsval tsval PREHOOK: query: -- interval year-month arithmetic -explain +explain vectorization detail select dateval, dateval - interval '2-2' year to month, @@ -49,7 +49,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval year-month arithmetic -explain +explain vectorization detail select dateval, dateval - interval '2-2' year to month, @@ -62,6 +62,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -76,13 +80,37 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 2-2) (type: date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + -2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractIntervalYearMonthScalar[2:long], DateColSubtractIntervalYearMonthScalar[3:long], DateColAddIntervalYearMonthScalar[4:long], DateColAddIntervalYearMonthScalar[5:long], IntervalYearMonthScalarAddDateColumn[6:long], IntervalYearMonthScalarAddDateColumn[7:long] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date) @@ -179,7 +207,7 @@ dateval c1 c2 c3 c4 c5 c6 9075-06-13 9073-04-13 9077-08-13 9077-08-13 9073-04-13 9073-04-13 9077-08-13 9209-11-11 9207-09-11 9212-01-11 9212-01-11 9207-09-11 9207-09-11 9212-01-11 9403-01-09 9400-11-09 9405-03-09 9405-03-09 9400-11-09 9400-11-09 9405-03-09 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dateval, dateval - date '1999-06-07', @@ -188,7 +216,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dateval, dateval - date '1999-06-07', @@ -198,6 +226,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -212,13 +244,37 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 1999-06-07) (type: interval_day_time), (1999-06-07 - dateval) (type: interval_day_time), (dateval - dateval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractDateScalar[2:timestamp], DateScalarSubtractDateColumn[3:timestamp], DateColSubtractDateColumn[4:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) @@ -309,7 +365,7 @@ dateval c1 c2 c3 9075-06-13 2584462 00:00:00.000000000 -2584462 00:00:00.000000000 0 00:00:00.000000000 9209-11-11 2633556 01:00:00.000000000 -2633556 01:00:00.000000000 0 00:00:00.000000000 9403-01-09 2704106 01:00:00.000000000 -2704106 01:00:00.000000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tsval, tsval - interval '2-2' year to month, @@ -321,7 +377,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tsval, tsval - interval '2-2' year to month, @@ -334,6 +390,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,13 +408,37 @@ STAGE PLANS: Select Operator expressions: tsval (type: timestamp), (tsval - 2-2) (type: timestamp), (tsval - -2-2) (type: timestamp), (tsval + 2-2) (type: timestamp), (tsval + -2-2) (type: timestamp), (-2-2 + tsval) (type: timestamp), (2-2 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:timestamp], TimestampColSubtractIntervalYearMonthScalar[2:timestamp], TimestampColSubtractIntervalYearMonthScalar[3:timestamp], TimestampColAddIntervalYearMonthScalar[4:timestamp], TimestampColAddIntervalYearMonthScalar[5:timestamp], IntervalYearMonthScalarAddTimestampColumn[6:timestamp], IntervalYearMonthScalarAddTimestampColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -451,7 +535,7 @@ tsval c1 c2 c3 c4 c5 c6 9075-06-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9073-04-13 16:20:09.218517797 9077-08-13 16:20:09.218517797 9209-11-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9212-01-11 04:08:58.223768453 9207-09-11 05:08:58.223768453 9207-09-11 05:08:58.223768453 9212-01-11 04:08:58.223768453 9403-01-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 9405-03-09 18:12:33.547 9400-11-09 18:12:33.547 9400-11-09 18:12:33.547 9405-03-09 18:12:33.547 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -459,7 +543,7 @@ from interval_arithmetic_1 order by interval '2-2' year to month + interval '3-3' year to month limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select interval '2-2' year to month + interval '3-3' year to month, interval '2-2' year to month - interval '3-3' year to month @@ -468,6 +552,10 @@ order by interval '2-2' year to month + interval '3-3' year to month limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -480,12 +568,35 @@ STAGE PLANS: alias: interval_arithmetic_1 Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: 5-5 (type: interval_year_month), -1-1 (type: interval_year_month) @@ -530,7 +641,7 @@ c0 c1 5-5 -1-1 5-5 -1-1 PREHOOK: query: -- interval day-time arithmetic -explain +explain vectorization detail select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -543,7 +654,7 @@ from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY POSTHOOK: query: -- interval day-time arithmetic -explain +explain vectorization detail select dateval, dateval - interval '99 11:22:33.123456789' day to second, @@ -556,6 +667,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -570,13 +685,37 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), (dateval - 99 11:22:33.123456789) (type: timestamp), (dateval - -99 11:22:33.123456789) (type: timestamp), (dateval + 99 11:22:33.123456789) (type: timestamp), (dateval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + dateval) (type: timestamp), (99 11:22:33.123456789 + dateval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], DateColSubtractIntervalDayTimeScalar[2:timestamp], DateColSubtractIntervalDayTimeScalar[3:timestamp], DateColAddIntervalDayTimeScalar[4:timestamp], DateColAddIntervalDayTimeScalar[5:timestamp], IntervalDayTimeScalarAddDateColumn[6:timestamp], IntervalDayTimeScalarAddDateColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -673,7 +812,7 @@ dateval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9075-09-20 11:22:33.123456789 9075-03-05 11:37:26.876543211 9075-03-05 11:37:26.876543211 9075-09-20 11:22:33.123456789 9209-11-11 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9210-02-18 11:22:33.123456789 9209-08-03 13:37:26.876543211 9209-08-03 13:37:26.876543211 9210-02-18 11:22:33.123456789 9403-01-09 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 9403-04-18 12:22:33.123456789 9402-10-01 13:37:26.876543211 9402-10-01 13:37:26.876543211 9403-04-18 12:22:33.123456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dateval, tsval, @@ -683,7 +822,7 @@ select from interval_arithmetic_1 order by dateval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dateval, tsval, @@ -694,6 +833,10 @@ from interval_arithmetic_1 order by dateval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -708,13 +851,37 @@ STAGE PLANS: Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:date], IdentityExpression[1:timestamp], DateColSubtractTimestampColumn[2:interval_day_time], TimestampColSubtractDateColumn[3:interval_day_time], TimestampColSubtractTimestampColumn[4:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: timestamp), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) @@ -807,7 +974,7 @@ dateval tsval c2 c3 c4 9075-06-13 9075-06-13 16:20:09.218517797 -0 16:20:09.218517797 0 16:20:09.218517797 0 00:00:00.000000000 9209-11-11 9209-11-11 04:08:58.223768453 -0 04:08:58.223768453 0 04:08:58.223768453 0 00:00:00.000000000 9403-01-09 9403-01-09 18:12:33.547 -0 18:12:33.547000000 0 18:12:33.547000000 0 00:00:00.000000000 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -819,7 +986,7 @@ select from interval_arithmetic_1 order by tsval PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tsval, tsval - interval '99 11:22:33.123456789' day to second, @@ -832,6 +999,10 @@ from interval_arithmetic_1 order by tsval POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -846,13 +1017,37 @@ STAGE PLANS: Select Operator expressions: tsval (type: timestamp), (tsval - 99 11:22:33.123456789) (type: timestamp), (tsval - -99 11:22:33.123456789) (type: timestamp), (tsval + 99 11:22:33.123456789) (type: timestamp), (tsval + -99 11:22:33.123456789) (type: timestamp), (-99 11:22:33.123456789 + tsval) (type: timestamp), (99 11:22:33.123456789 + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:timestamp], TimestampColSubtractIntervalDayTimeScalar[2:timestamp], TimestampColSubtractIntervalDayTimeScalar[3:timestamp], TimestampColAddIntervalDayTimeScalar[4:timestamp], TimestampColAddIntervalDayTimeScalar[5:timestamp], IntervalDayTimeScalarAddTimestampColumn[6:timestamp], IntervalDayTimeScalarAddTimestampColumn[7:timestamp] + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp) @@ -949,14 +1144,14 @@ tsval _c1 _c2 _c3 _c4 _c5 _c6 9075-06-13 16:20:09.218517797 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9075-09-21 03:42:42.341974586 9075-03-06 03:57:36.095061008 9075-03-06 03:57:36.095061008 9075-09-21 03:42:42.341974586 9209-11-11 04:08:58.223768453 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9210-02-18 15:31:31.347225242 9209-08-03 17:46:25.100311664 9209-08-03 17:46:25.100311664 9210-02-18 15:31:31.347225242 9403-01-09 18:12:33.547 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 9403-04-19 06:35:06.670456789 9402-10-02 07:50:00.423543211 9402-10-02 07:50:00.423543211 9403-04-19 06:35:06.670456789 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second from interval_arithmetic_1 limit 2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second, interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second @@ -964,6 +1159,10 @@ from interval_arithmetic_1 limit 2 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -978,18 +1177,42 @@ STAGE PLANS: Select Operator expressions: 109 20:30:40.246913578 (type: interval_day_time), 89 02:14:26.000000000 (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[2:interval_day_time], ConstantVectorExpression[3:interval_day_time] + vectorized: true Statistics: Num rows: 50 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index 13a1bac..e194122 100644 --- ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -136,7 +136,7 @@ POSTHOOK: Lineage: vectortab_b_1korc.si SIMPLE [(vectortab_b_1k)vectortab_b_1k.F POSTHOOK: Lineage: vectortab_b_1korc.t SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab_b_1korc.ts2 SIMPLE [(vectortab_b_1k)vectortab_b_1k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select v1.s, v2.s, @@ -158,7 +158,7 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select v1.s, v2.s, @@ -180,6 +180,10 @@ join on v1.intrvl1 = v2.intrvl2 and v1.s = v2.s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -216,11 +220,23 @@ STAGE PLANS: alias: vectortab_a_1korc Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] SelectColumnIsNotNull[-1:boolean](DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]))) + vectorized: true predicate: (s is not null and (dt - CAST( ts AS DATE)) is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], DateColSubtractDateColumn[14:timestamp](CastTimestampToDate[13:date]) + vectorized: true Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -228,20 +244,45 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[2:string], IdentityExpression[1:interval_day_time] + vectorized: true Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1100 Data size: 506290 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_join30.q.out ql/src/test/results/clientpositive/vector_join30.q.out index 45ed894..198d9f7 100644 --- ql/src/test/results/clientpositive/vector_join30.q.out +++ ql/src/test/results/clientpositive/vector_join30.q.out @@ -14,7 +14,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -22,7 +22,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -30,6 +30,10 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 @@ -50,17 +54,47 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -113,6 +147,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -124,6 +162,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -175,6 +221,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -222,18 +272,48 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -271,7 +351,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -279,7 +359,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -287,6 +367,10 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 @@ -307,12 +391,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -365,6 +473,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -376,6 +488,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -436,13 +556,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -480,7 +624,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -488,7 +632,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -496,6 +640,10 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 @@ -516,12 +664,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -574,6 +746,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -585,6 +761,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -645,13 +829,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -689,7 +897,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 103231310608 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -700,7 +908,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -711,6 +919,10 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-9 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-10, Stage-11, Stage-12, Stage-2 @@ -734,17 +946,47 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -810,6 +1052,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -821,6 +1067,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -885,6 +1139,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -938,6 +1196,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -993,18 +1255,48 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1024,18 +1316,48 @@ STAGE PLANS: alias: orcsrc Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1079,7 +1401,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -1090,7 +1412,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -1101,6 +1423,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2 @@ -1124,12 +1450,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1195,6 +1545,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1206,6 +1560,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1270,6 +1632,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1327,13 +1693,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1355,13 +1745,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1405,7 +1819,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1416,7 +1830,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1427,6 +1841,10 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1448,12 +1866,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1519,6 +1961,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1530,6 +1976,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1598,13 +2052,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1626,13 +2104,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1676,7 +2178,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1687,7 +2189,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1698,6 +2200,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1719,12 +2225,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -1790,6 +2320,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -1801,6 +2335,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1869,13 +2411,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -1897,13 +2463,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -1947,7 +2537,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### 348019368476 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1958,7 +2548,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1969,6 +2559,10 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 @@ -1990,12 +2584,36 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -2061,6 +2679,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -2072,6 +2694,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -2140,13 +2770,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -2168,13 +2822,37 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/vector_join_part_col_char.q.out index 6f6efc6..907a23f 100644 --- ql/src/test/results/clientpositive/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/vector_join_part_col_char.q.out @@ -97,10 +97,14 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@char_tbl2 gpa=3 gpa=3.5 -PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY -POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -108,55 +112,16 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: c1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string), age (type: int), gpa (type: char(50)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: char(50)) - sort order: + - Map-reduce partition columns: _col2 (type: char(50)) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: int) - TableScan - alias: c2 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string), age (type: int), gpa (type: char(5)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: char(50)) - sort order: + - Map-reduce partition columns: _col2 (type: char(50)) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: char(50)) - 1 _col2 (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 644b0dc..340913f 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,6 +16,10 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -26,10 +30,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:cd - Fetch Operator limit: -1 $hdt$_2:hd - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:cd @@ -59,60 +61,24 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select count(*) from (select c.ctinyint from alltypesorc c diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index 67725bd..17be8bc 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -80,12 +80,16 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -169,12 +173,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -258,12 +266,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -302,6 +314,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -312,20 +330,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -348,12 +391,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -392,6 +439,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -402,20 +455,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -438,12 +516,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -482,6 +564,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -492,20 +580,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -528,12 +641,16 @@ POSTHOOK: Input: default@tjoin2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -572,6 +689,12 @@ STAGE PLANS: Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -582,20 +705,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorized: true outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], IdentityExpression[2:int], IdentityExpression[3:char(2)] + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out index 485e352..44138b5 100644 --- ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_leftsemi_mapjoin.q.out @@ -132,85 +132,17 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) +PREHOOK: query: explain vectorization only summary - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -228,85 +160,15 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -326,85 +188,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -416,89 +208,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -521,182 +239,38 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -711,89 +285,15 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -805,85 +305,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -900,85 +330,15 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -1005,206 +365,43 @@ POSTHOOK: Input: default@t3 4 5 5 -5 -8 -8 -9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +5 +8 +8 +9 +PREHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -1232,85 +429,15 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -1336,112 +463,15 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -1469,94 +499,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -1591,101 +542,20 @@ POSTHOOK: Input: default@t3 10 10 10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -1728,94 +598,15 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -1858,94 +649,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -1990,96 +702,15 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -2107,132 +738,43 @@ POSTHOOK: Input: default@t3 0 0 0 -0 -0 -0 -0 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +0 +0 +0 +0 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -2281,80 +823,15 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -2366,10 +843,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2380,7 +863,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -2406,45 +888,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -2462,10 +910,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2476,7 +930,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -2502,45 +955,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -2560,10 +979,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2574,7 +999,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -2600,45 +1024,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -2650,10 +1040,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2664,7 +1060,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -2690,49 +1085,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -2755,10 +1112,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2769,7 +1132,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -2795,45 +1157,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -2848,10 +1176,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2862,7 +1196,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b:t3 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b:t3 @@ -2888,49 +1221,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -2945,10 +1240,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization summary +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -2959,7 +1260,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b:t2 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b:t2 @@ -2985,49 +1285,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -3039,10 +1301,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3053,7 +1321,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b:t1 @@ -3079,45 +1346,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -3134,10 +1367,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -3148,7 +1387,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -3174,45 +1412,11 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -3243,10 +1447,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3257,7 +1467,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -3283,45 +1492,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -3337,10 +1512,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -3351,10 +1532,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 c - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -3393,52 +1572,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -3466,10 +1604,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization summary +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -3480,7 +1624,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -3506,45 +1649,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -3570,10 +1679,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -3584,10 +1699,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 c - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -3635,47 +1748,11 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -3703,10 +1780,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -3717,10 +1800,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 c - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -3749,48 +1830,15 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + + Stage: Stage-2 + Map Reduce Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -3830,10 +1878,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3842,84 +1896,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -3962,10 +1944,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -3976,10 +1964,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 c - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -4012,44 +1998,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -4092,10 +2045,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -4106,10 +2065,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -4142,44 +2099,11 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -4224,10 +2148,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4236,84 +2166,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -4369,10 +2227,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -4383,10 +2247,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: b - Fetch Operator limit: -1 c - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: b @@ -4420,53 +2282,11 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -4515,10 +2335,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -4529,7 +2355,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:b @@ -4555,40 +2380,11 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -4600,10 +2396,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4612,74 +2414,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4697,10 +2438,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4709,74 +2456,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -4796,10 +2482,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -4808,169 +2500,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t4 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t4 -#### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) +POSTHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -4993,10 +2565,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5005,74 +2583,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -5087,10 +2604,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5099,78 +2622,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -5185,10 +2643,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5197,78 +2661,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -5280,10 +2679,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5292,74 +2697,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5376,10 +2720,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -5388,74 +2738,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -5480,92 +2769,37 @@ POSTHOOK: Input: default@t3 2 4 4 -5 -5 -5 -8 -8 -9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5581,10 +2815,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -5593,97 +2833,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -5711,10 +2867,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -5723,74 +2885,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -5816,10 +2917,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -5828,101 +2935,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -5950,10 +2969,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -5962,83 +2987,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6078,96 +3033,31 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6210,10 +3100,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -6222,83 +3118,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6341,10 +3167,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -6353,83 +3185,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6474,10 +3236,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6486,84 +3254,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -6619,10 +3316,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -6631,93 +3334,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -6766,10 +3389,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -6778,160 +3407,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -6949,10 +3467,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -6961,74 +3485,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7048,10 +3511,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7060,74 +3529,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7139,10 +3547,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7151,78 +3565,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -7245,10 +3594,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7257,74 +3612,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -7339,10 +3633,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7351,78 +3651,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -7437,10 +3672,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7449,78 +3690,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -7532,10 +3708,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7544,74 +3726,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7628,10 +3749,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -7640,74 +3767,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -7738,10 +3804,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7750,74 +3822,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7833,10 +3844,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -7845,97 +3862,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -7963,10 +3896,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -7975,74 +3914,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -8068,113 +3946,31 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8202,10 +3998,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8214,83 +4016,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8330,10 +4062,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8342,84 +4080,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8462,10 +4129,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8474,83 +4147,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8593,10 +4196,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -8605,83 +4214,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8726,10 +4265,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8738,84 +4283,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -8865,111 +4339,37 @@ POSTHOOK: Input: default@t3 5 5 5 -8 -8 -9 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) +8 +8 +9 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -9018,10 +4418,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -9030,69 +4436,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -9104,10 +4454,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9116,74 +4472,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9201,10 +4496,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9213,74 +4514,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9300,10 +4540,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9312,74 +4558,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9391,10 +4576,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9403,78 +4594,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -9497,10 +4623,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9509,74 +4641,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -9591,10 +4662,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9603,78 +4680,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -9689,10 +4701,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9701,78 +4719,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -9784,10 +4737,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -9796,74 +4755,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -9880,10 +4778,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -9892,74 +4796,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -9990,10 +4833,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -10002,192 +4851,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + Stage: Stage-0 + Fetch Operator + Processor Tree: + +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -10215,10 +4925,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -10227,74 +4943,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -10320,10 +4975,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -10332,101 +4993,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -10454,10 +5027,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10466,83 +5045,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -10582,10 +5091,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -10594,84 +5109,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -10710,99 +5154,35 @@ POSTHOOK: Input: default@t3 4 4 8 -8 -NULL -NULL -NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) +8 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -10845,10 +5225,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -10857,83 +5243,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -10978,10 +5294,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -10990,84 +5312,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -11123,10 +5374,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -11135,93 +5392,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -11270,10 +5447,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -11282,69 +5465,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY @@ -11356,10 +5483,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11368,74 +5501,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -11453,10 +5525,16 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11465,74 +5543,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -11552,10 +5569,16 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11564,74 +5587,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY @@ -11643,10 +5605,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11655,78 +5623,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col1 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY @@ -11749,10 +5652,16 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11761,74 +5670,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY @@ -11843,10 +5691,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11855,78 +5709,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t3 - TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 5) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY @@ -11941,10 +5730,16 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization only detail +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -11953,78 +5748,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t2 - TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY @@ -12036,10 +5766,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12048,74 +5784,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b:t1 - TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12132,86 +5807,31 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY @@ -12242,10 +5862,16 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12254,74 +5880,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12337,10 +5902,16 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -12349,97 +5920,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY @@ -12467,10 +5954,16 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization only detail +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -12479,74 +5972,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY @@ -12572,10 +6004,16 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -12584,101 +6022,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -12698,103 +6048,39 @@ POSTHOOK: Input: default@t3 0 0 0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -12834,10 +6120,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -12846,84 +6138,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -12966,10 +6187,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -12978,83 +6205,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13097,10 +6254,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -13109,83 +6272,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13230,10 +6323,16 @@ NULL NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -13242,84 +6341,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 4501 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -13375,10 +6403,16 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -13387,93 +6421,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 _col0 (type: int) - c - TableScan - alias: c - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 value (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2250 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 2475 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -13522,10 +6476,16 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: query: explain vectorization only detail +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -13534,69 +6494,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 2046 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 160b088..8bfa285 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 @@ -54,6 +58,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -107,6 +119,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -147,6 +163,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -242,6 +262,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -276,19 +300,23 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain +explain vectorization detail select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 @@ -321,6 +349,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -374,6 +410,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -414,6 +454,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work @@ -509,6 +553,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out index 8c1acc8..bc3a885 100644 --- ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out @@ -211,7 +211,7 @@ POSTHOOK: Output: default@store PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -226,7 +226,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with -- the 2 TableScanOperators that have different schema. -explain select +explain vectorization select s_state, count(1) from store_sales, store, @@ -238,6 +238,10 @@ explain select order by s_state limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -248,160 +252,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - TableScan - alias: store - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: string) - TableScan - alias: date_dim - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col3 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 100 - Processor Tree: - ListSink diff --git ql/src/test/results/clientpositive/vector_multi_insert.q.out ql/src/test/results/clientpositive/vector_multi_insert.q.out index e9f106d..063a245 100644 --- ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc1 POSTHOOK: Output: default@orc1 -PREHOOK: query: explain from orc1 a +PREHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 PREHOOK: type: QUERY -POSTHOOK: query: explain from orc1 a +POSTHOOK: query: explain vectorization from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 insert overwrite table orc_rn3 select a.* where a.rn >= 1000 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 @@ -102,56 +106,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rn < 100) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rn (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn1 - Filter Operator - predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rn (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - Filter Operator - predicate: (rn >= 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rn (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-9 Conditional Operator @@ -177,27 +140,9 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn1 Stage: Stage-7 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn1 Stage: Stage-8 Move Operator @@ -229,27 +174,9 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 Stage: Stage-13 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 Stage: Stage-14 Move Operator @@ -281,27 +208,9 @@ STAGE PLANS: Stage: Stage-17 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 Stage: Stage-19 Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 Stage: Stage-20 Move Operator diff --git ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index 8845cb2..fca7039 100644 --- ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9,28 +13,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint) IN (ctinyint, cbigint) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + vectorized: false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink diff --git ql/src/test/results/clientpositive/vector_non_string_partition.q.out ql/src/test/results/clientpositive/vector_non_string_partition.q.out index 11677b7..ff54040 100644 --- ql/src/test/results/clientpositive/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -38,45 +42,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), ctinyint (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: tinyint) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY @@ -98,10 +79,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -109,44 +94,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_null_projection.q.out ql/src/test/results/clientpositive/vector_null_projection.q.out index 779f787..7dd2491 100644 --- ql/src/test/results/clientpositive/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@b POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization detail select NULL from a PREHOOK: type: QUERY POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. -explain +explain vectorization detail select NULL from a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,6 +70,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -82,12 +92,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@a #### A masked pattern was here #### NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select NULL as x from a union distinct select NULL as x from b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select NULL as x from a union distinct select NULL as x from b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -134,6 +148,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: void) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: void) diff --git ql/src/test/results/clientpositive/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/vector_nullsafe_join.q.out index 2090c24..927ee70 100644 --- ql/src/test/results/clientpositive/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/vector_nullsafe_join.q.out @@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -64,7 +68,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -78,39 +81,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 value (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY @@ -131,10 +115,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -145,10 +133,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -178,43 +164,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY @@ -226,10 +189,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -240,10 +207,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -267,41 +232,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY @@ -340,10 +284,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -354,10 +302,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -387,44 +333,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY @@ -436,10 +358,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -450,10 +376,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -477,41 +401,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY @@ -629,11 +532,15 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL PREHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain select * from myinput1 a join myinput1 b on a.key<=>b.value +explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -644,7 +551,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -658,39 +564,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 value (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY @@ -711,10 +598,14 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -725,10 +616,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -758,43 +647,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY @@ -806,10 +672,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -820,10 +690,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -847,41 +715,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY @@ -920,10 +767,14 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -934,10 +785,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -967,44 +816,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY @@ -1016,10 +841,14 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain vectorization select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1030,10 +859,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: a - Fetch Operator limit: -1 b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: a @@ -1057,41 +884,20 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/vector_number_compare_projection.q.out index 4f1a26c..f538255 100644 --- ql/src/test/results/clientpositive/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/vector_number_compare_projection.q.out @@ -103,7 +103,7 @@ scratch.t scratch.si scratch.i scratch.b scratch.f scratch.d scratch.dc PREHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q @@ -111,12 +111,16 @@ PREHOOK: type: QUERY POSTHOOK: query: -- -- Projection LongColLongScalar -- -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,13 +135,37 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:smallint], IdentityExpression[2:int], LongColLessLongScalar[7:long], LongColLessEqualLongScalar[8:long], LongColEqualLongScalar[9:long] + vectorized: true Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) @@ -176,17 +204,21 @@ POSTHOOK: Input: default@vectortab2k_orc #### A masked pattern was here #### c0 -3601806268 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,13 +233,37 @@ STAGE PLANS: Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[1:smallint], IdentityExpression[2:int], IdentityExpression[3:bigint], LongColGreaterLongScalar[7:long], LongColGreaterEqualLongScalar[8:long], LongColNotEqualLongScalar[9:long], LongColGreaterLongScalar[10:long] + vectorized: true Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 8330810..1c10478 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -1,13 +1,17 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc WHERE (cdouble IS NULL) LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,34 +19,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: null (type: double), 100.0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n FROM alltypesorc @@ -68,14 +56,18 @@ NULL 100.0 NULL 100.0 NULL 100.0 NULL 100.0 -PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -83,31 +75,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float), NVL(cfloat,1) (type: float) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, nvl(cfloat, 1) as n FROM alltypesorc @@ -131,33 +110,24 @@ NULL 1.0 27.0 27.0 -11.0 -11.0 61.0 61.0 -PREHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, 10) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, 10) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 10 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - ListSink PREHOOK: query: SELECT nvl(null, 10) as n FROM alltypesorc @@ -181,33 +151,24 @@ POSTHOOK: Input: default@alltypesorc 10 10 10 -PREHOOK: query: EXPLAIN SELECT nvl(null, null) as n +PREHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT nvl(null, null) as n +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT nvl(null, null) as n FROM alltypesorc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - ListSink PREHOOK: query: SELECT nvl(null, null) as n FROM alltypesorc diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 4840b41..5f4429e 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -101,12 +101,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -122,9 +126,23 @@ STAGE PLANS: Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[7:boolean], IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[7:boolean] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -133,9 +151,27 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -159,6 +195,14 @@ STAGE PLANS: sort order: - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_outer_join0.q.out ql/src/test/results/clientpositive/vector_outer_join0.q.out index 68134f2..d21ff0b 100644 --- ql/src/test/results/clientpositive/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/vector_outer_join0.q.out @@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2 4 FOUR NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -76,58 +80,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:t2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:t2 - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -149,12 +106,16 @@ one 1 NULL NULL one 1 NULL NULL three 3 3 THREE two 2 2 TWO -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -163,58 +124,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:t1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:t1 - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: v1 (type: string), a (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index c015578..6876a50 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -236,58 +240,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -326,18 +283,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -346,58 +307,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:hd - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS @@ -522,7 +436,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -531,7 +445,7 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -540,6 +454,10 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -548,96 +466,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 3320 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 3652 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 4017 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(_col0) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 98c65d4..b448ccd 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd on hd.cbigint = c.cbigint ) t1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -252,96 +256,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-8 Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4182 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 22 Data size: 4600 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 24 Data size: 5060 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index a6d5c59..625eb98 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -244,104 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -367,7 +270,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 20 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -376,7 +279,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -385,104 +288,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring2 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 @@ -508,7 +314,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a #### A masked pattern was here #### 28 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -517,7 +323,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -526,104 +332,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cstring2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: bigint), _col3 (type: string) - 1 _col0 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.cstring1 diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index b6def86..d24812e 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -246,79 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select * @@ -391,79 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select c.ctinyint @@ -892,7 +772,7 @@ NULL NULL NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -901,7 +781,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -910,104 +790,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:cd - Fetch Operator - limit: -1 - $hdt$_2:hd - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:cd - TableScan - alias: cd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - $hdt$_2:hd - TableScan - alias: hd - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: -- SORT_QUERY_RESULTS select count(*) from (select c.ctinyint diff --git ql/src/test/results/clientpositive/vector_outer_join5.q.out ql/src/test/results/clientpositive/vector_outer_join5.q.out index 4255a94..f30936d 100644 --- ql/src/test/results/clientpositive/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/vector_outer_join5.q.out @@ -66,95 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st on s.ctinyint = st.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:st - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:st - TableScan - alias: st - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -174,101 +100,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -288,101 +134,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6058 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((UDFToInteger(_col0) pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -402,101 +168,21 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6248 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm on s.ctinyint = sm.ctinyint and s.ctinyint < 100 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 100)} - 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -516,7 +202,7 @@ POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 #### A masked pattern was here #### 6876 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -525,7 +211,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -534,104 +220,7 @@ left outer join sorted_mod_4 s2 on s2.ctinyint = s.ctinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - $hdt$_2:s2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - $hdt$_2:s2 - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -717,95 +306,21 @@ POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st on s.cmodtinyint = st.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:st - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:st - TableScan - alias: st - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -825,101 +340,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 39112 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col1 = 2)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -939,101 +374,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 11171 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {((_col0 pmod 4) = _col1)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1053,101 +408,21 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 14371 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter predicates: - 0 {(_col0 < 3)} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{}},"Stage-2":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -1167,7 +442,7 @@ POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 #### A masked pattern was here #### 17792 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1176,7 +451,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -1185,104 +460,7 @@ left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint ) t1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:sm - Fetch Operator - limit: -1 - $hdt$_2:s2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:sm - TableScan - alias: sm - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - $hdt$_2:s2 - TableScan - alias: s2 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cmodtinyint (type: int), cmodint (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 UDFToLong(_col1) (type: bigint) - 1 (_col0 pmod 8) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{}},"Stage-3":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm diff --git ql/src/test/results/clientpositive/vector_outer_join6.q.out ql/src/test/results/clientpositive/vector_outer_join6.q.out index 8c09716..c0fcdc9 100644 --- ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -126,106 +126,15 @@ POSTHOOK: Output: default@TJOIN4 POSTHOOK: Lineage: tjoin4.c1 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin4.c2 SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin4.rnum SIMPLE [(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{}},"Stage-5":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -244,102 +153,15 @@ POSTHOOK: Input: default@tjoin3 0 3 0 1 NULL NULL 2 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:tjoin2 - Fetch Operator - limit: -1 - $hdt$_1:tjoin3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:tjoin2 - TableScan - alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - $hdt$_1:tjoin3 - TableScan - alias: tjoin3 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: rnum (type: int), c1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{}},"Stage-5":{"Map Reduce":{}},"Stage-0":{}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out index b224da8..fee44fb 100644 --- ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -76,12 +76,16 @@ POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(invent POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -96,17 +100,48 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -195,12 +230,16 @@ POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(in POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -215,17 +254,48 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -316,12 +386,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2a POSTHOOK: Input: default@inventory_part_2a@par=2 POSTHOOK: Output: default@inventory_part_2a@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2a POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -336,17 +410,48 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -418,12 +523,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_2b POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_2b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -438,17 +547,48 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -526,12 +666,16 @@ POSTHOOK: type: ALTERTABLE_RENAMECOL POSTHOOK: Input: default@inventory_part_3 POSTHOOK: Input: default@inventory_part_3@par=2 POSTHOOK: Output: default@inventory_part_3@par=2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sum(inv_quantity_on_hand) from inventory_part_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -546,17 +690,48 @@ STAGE PLANS: Select Operator expressions: inv_quantity_on_hand (type: int) outputColumnNames: inv_quantity_on_hand + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:int] + vectorized: true Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(inv_quantity_on_hand) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out index 09dd873..0b735de 100644 --- ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out @@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -277,14 +281,38 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:date], IdentityExpression[3:timestamp], IdentityExpression[4:float], IdentityExpression[5:int] + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int), _col2 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) @@ -310,6 +338,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) @@ -365,12 +401,16 @@ Chicago New York 2010-10-24 2010-10-24 07:00:00 113.0 897 Chicago New York 2010-10-25 2010-10-25 07:00:00 -1.0 897 Chicago New York 2010-10-26 2010-10-26 07:00:00 0.0 897 Chicago New York 2010-10-27 2010-10-27 07:00:00 -11.0 897 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -385,9 +425,23 @@ STAGE PLANS: Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:date] + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[2:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -396,9 +450,27 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -819,12 +891,16 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_date@fl_date=2010-10-31 2010-10-29 12 2010-10-30 11 2010-10-31 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1011,12 +1087,16 @@ Chicago New York 2010-10-31 07:00:00 -4.0 1531 2010-10-31 Chicago New York 2010-10-31 07:00:00 -22.0 1610 2010-10-31 Chicago New York 2010-10-31 07:00:00 -15.0 3198 2010-10-31 Washington New York 2010-10-31 07:00:00 -18.0 7282 2010-10-31 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1032,14 +1112,38 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_time (type: timestamp), arr_delay (type: float), fl_num (type: int), fl_date (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:timestamp], IdentityExpression[3:float], IdentityExpression[4:int], IdentityExpression[5:date] + vectorized: true Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) @@ -1065,6 +1169,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 5775 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) @@ -1144,12 +1256,16 @@ Chicago New York 2010-10-24 07:00:00 113.0 897 2010-10-24 Chicago New York 2010-10-25 07:00:00 -1.0 897 2010-10-25 Chicago New York 2010-10-26 07:00:00 0.0 897 2010-10-26 Chicago New York 2010-10-27 07:00:00 -11.0 897 2010-10-27 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1164,9 +1280,23 @@ STAGE PLANS: Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:date] + vectorized: true Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:date] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_date (type: date) mode: hash outputColumnNames: _col0, _col1 @@ -1175,9 +1305,27 @@ STAGE PLANS: key expressions: _col0 (type: date) sort order: + Map-reduce partition columns: _col0 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 31776 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1622,12 +1770,16 @@ POSTHOOK: Input: default@flights_tiny_orc_partitioned_timestamp@fl_time=2010-10- 2010-10-29 07:00:00 12 2010-10-30 07:00:00 11 2010-10-31 07:00:00 8 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1814,12 +1966,16 @@ Chicago New York 2010-10-31 -4.0 1531 2010-10-31 07:00:00 Chicago New York 2010-10-31 -22.0 1610 2010-10-31 07:00:00 Chicago New York 2010-10-31 -15.0 3198 2010-10-31 07:00:00 Washington New York 2010-10-31 -18.0 7282 2010-10-31 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1835,14 +1991,38 @@ STAGE PLANS: Select Operator expressions: origin_city_name (type: string), dest_city_name (type: string), fl_date (type: date), arr_delay (type: float), fl_num (type: int), fl_time (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], IdentityExpression[2:date], IdentityExpression[3:float], IdentityExpression[4:int], IdentityExpression[5:timestamp] + vectorized: true Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) @@ -1868,6 +2048,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 6175 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) @@ -1947,12 +2135,16 @@ Chicago New York 2010-10-24 113.0 897 2010-10-24 07:00:00 Chicago New York 2010-10-25 -1.0 897 2010-10-25 07:00:00 Chicago New York 2010-10-26 0.0 897 2010-10-26 07:00:00 Chicago New York 2010-10-27 -11.0 897 2010-10-27 07:00:00 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1967,9 +2159,23 @@ STAGE PLANS: Select Operator expressions: fl_time (type: timestamp) outputColumnNames: fl_time + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:timestamp] + vectorized: true Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:timestamp] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: fl_time (type: timestamp) mode: hash outputColumnNames: _col0, _col1 @@ -1978,9 +2184,27 @@ STAGE PLANS: key expressions: _col0 (type: timestamp) sort order: + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 137 Data size: 33968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vector_reduce1.q.out ql/src/test/results/clientpositive/vector_reduce1.q.out index fe69ebd..348e074 100644 --- ql/src/test/results/clientpositive/vector_reduce1.q.out +++ ql/src/test/results/clientpositive/vector_reduce1.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select b from vectortab2korc order by b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select b from vectortab2korc order by b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,12 +129,36 @@ STAGE PLANS: Select Operator expressions: b (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vector_reduce2.q.out ql/src/test/results/clientpositive/vector_reduce2.q.out index 22d95ad..39b1baf 100644 --- ql/src/test/results/clientpositive/vector_reduce2.q.out +++ ql/src/test/results/clientpositive/vector_reduce2.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s, i, s2 from vectortab2korc order by s, i, s2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s, i, s2 from vectortab2korc order by s, i, s2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,12 +129,36 @@ STAGE PLANS: Select Operator expressions: s (type: string), i (type: int), s2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string], IdentityExpression[2:int], IdentityExpression[9:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce3.q.out ql/src/test/results/clientpositive/vector_reduce3.q.out index c7dffd8..b7b1fdc 100644 --- ql/src/test/results/clientpositive/vector_reduce3.q.out +++ ql/src/test/results/clientpositive/vector_reduce3.q.out @@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select s from vectortab2korc order by s PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select s from vectortab2korc order by s POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,12 +129,36 @@ STAGE PLANS: Select Operator expressions: s (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:string] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 5d7000a..09b18db 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -14,20 +14,24 @@ POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, cdouble, cdecimal1, cdecimal2, min(cdecimal1) as min_decimal1 FROM decimal_test WHERE cdecimal1 is not null and cdecimal2 is not null GROUP BY cint, cdouble, cdecimal1, cdecimal2 ORDER BY cint, cdouble, cdecimal1, cdecimal2 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -41,10 +45,24 @@ STAGE PLANS: alias: decimal_test Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[4:long]) SelectColumnIsNotNull[-1:boolean](CastDecimalToBoolean[4:long])) + vectorized: true predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(IdentityExpression[2:decimal(20,10)]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:int], IdentityExpression[1:double], IdentityExpression[2:decimal(20,10)], IdentityExpression[3:decimal(23,14)] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -53,10 +71,28 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -81,6 +117,14 @@ STAGE PLANS: Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index 39a64d8..2ace6ed 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -97,16 +97,20 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT s AS `string`, +PREHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT s AS `string`, +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, CONCAT(CONCAT('|',RTRIM(CONCAT(CONCAT(' ',s),' '))),'|') AS `none_z_rtrim_str` FROM over1korc LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -114,31 +118,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 20 - Processor Tree: - ListSink PREHOOK: query: SELECT s AS `string`, CONCAT(CONCAT(' ',s),' ') AS `none_padded_str`, @@ -281,20 +272,24 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -310,8 +305,21 @@ STAGE PLANS: Select Operator expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringGroupConcatColCol[19:String_Family](StringGroupColConcatStringScalar[17:String_Family](StringScalarConcatStringGroupCol[18:String_Family](CastLongToString[17:String](CastDoubleToLong[13:long](DoubleColAddDoubleScalar[15:double](DoubleColDivideDoubleScalar[16:double](CastLongToDouble[15:double](LongColSubtractLongScalar[14:long](VectorUDFMonthDate[13:long])))))))) CastLongToString[18:String](VectorUDFYearDate[13:long])) + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[19:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -320,9 +328,27 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -345,6 +371,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/vector_string_decimal.q.out ql/src/test/results/clientpositive/vector_string_decimal.q.out index e0a3563..eeae0db 100644 --- ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/vector_string_decimal.q.out @@ -40,12 +40,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_decimal POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from orc_decimal where id in ('100000000', '200000000') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from orc_decimal where id in ('100000000', '200000000') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -71,6 +75,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string + vectorized: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_struct_in.q.out ql/src/test/results/clientpositive/vector_struct_in.q.out index 3971f55..e0fb878 100644 --- ql/src/test/results/clientpositive/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/vector_struct_in.q.out @@ -22,7 +22,7 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -36,7 +36,7 @@ struct('nine','1'), struct('ten','1') ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_1 where struct(`id`, `lineid`) IN ( struct('two','3'), @@ -50,6 +50,10 @@ struct('nine','1'), struct('ten','1') ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,20 +66,45 @@ STAGE PLANS: alias: test_1 Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterStructColumnInList[-1:boolean] + vectorized: true predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string] + vectorized: true Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -115,7 +144,7 @@ POSTHOOK: Input: default@test_1 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -129,7 +158,7 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two','3'), @@ -143,6 +172,10 @@ struct('nine','1'), struct('ten','1') ) as b from test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -157,15 +190,34 @@ STAGE PLANS: Select Operator expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:string], StructColumnInList[3:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -225,7 +277,7 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -239,7 +291,7 @@ struct(9,1), struct(10,1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_2 where struct(`id`, `lineid`) IN ( struct(2,3), @@ -253,6 +305,10 @@ struct(9,1), struct(10,1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -265,20 +321,45 @@ STAGE PLANS: alias: test_2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterStructColumnInList[-1:boolean] + vectorized: true predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int] + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -318,7 +399,7 @@ POSTHOOK: Input: default@test_2 #### A masked pattern was here #### 1 1 7 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -332,7 +413,7 @@ struct(9,1), struct(10,1) ) as b from test_2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct(2,3), @@ -346,6 +427,10 @@ struct(9,1), struct(10,1) ) as b from test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -360,15 +445,34 @@ STAGE PLANS: Select Operator expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:int], StructColumnInList[3:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -428,7 +532,7 @@ POSTHOOK: Input: default@values__tmp__table__3 POSTHOOK: Output: default@test_3 POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -442,7 +546,7 @@ struct('nine',1), struct('ten',1) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_3 where struct(`id`, `lineid`) IN ( struct('two',3), @@ -456,6 +560,10 @@ struct('nine',1), struct('ten',1) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -468,20 +576,45 @@ STAGE PLANS: alias: test_3 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterStructColumnInList[-1:boolean] + vectorized: true predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:int] + vectorized: true Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -521,7 +654,7 @@ POSTHOOK: Input: default@test_3 #### A masked pattern was here #### one 1 seven 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -535,7 +668,7 @@ struct('nine',1), struct('ten',1) ) as b from test_3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `id`, `lineid`, struct(`id`, `lineid`) IN ( struct('two',3), @@ -549,6 +682,10 @@ struct('nine',1), struct('ten',1) ) as b from test_3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -563,15 +700,34 @@ STAGE PLANS: Select Operator expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], IdentityExpression[1:int], StructColumnInList[3:boolean] + vectorized: true Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -632,7 +788,7 @@ POSTHOOK: Output: default@test_4 POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -647,7 +803,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -662,6 +818,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -674,20 +834,45 @@ STAGE PLANS: alias: test_4 Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterStructColumnInList[-1:boolean] + vectorized: true predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:string], IdentityExpression[2:double] + vectorized: true Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -728,7 +913,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_4 #### A masked pattern was here #### 1 a 0.5 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -743,7 +928,7 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) IN ( struct(1L, "a", 1.5D), @@ -758,6 +943,10 @@ struct(1L, "a", 0.5D), struct(3L, "b", 1.5D) ) as b from test_4 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -772,15 +961,34 @@ STAGE PLANS: Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:bigint], IdentityExpression[1:string], IdentityExpression[2:double], StructColumnInList[4:boolean] + vectorized: true Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 7e5f24d..28c8139 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -1,10 +1,14 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,15 +24,34 @@ STAGE PLANS: Select Operator expressions: 'key1' (type: string), 'value1' (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[12:string], ConstantVectorExpression[13:string] + vectorized: true Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -54,15 +77,19 @@ POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -84,9 +111,20 @@ STAGE PLANS: Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[12:decimal(18,9)] + vectorized: true Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -94,6 +132,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-7 Conditional Operator @@ -168,14 +214,18 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@decimal_2 POSTHOOK: Output: default@decimal_2 PREHOOK: query: -- Dummy tables HIVE-13190 -explain +explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y PREHOOK: type: QUERY POSTHOOK: query: -- Dummy tables HIVE-13190 -explain +explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -195,6 +245,13 @@ STAGE PLANS: key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE @@ -218,6 +275,14 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -248,13 +313,17 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### _c0 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail create temporary table dual as select 1 PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail create temporary table dual as select 1 POSTHOOK: type: CREATETABLE_AS_SELECT Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -286,6 +355,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dual + Map Vectorization: + enabled: false +#### A masked pattern was here #### Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_udf1.q.out ql/src/test/results/clientpositive/vector_udf1.q.out index 0d8c206..ae38e6e 100644 --- ql/src/test/results/clientpositive/vector_udf1.q.out +++ ql/src/test/results/clientpositive/vector_udf1.q.out @@ -25,7 +25,7 @@ POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- UDFs with varchar support -explain +explain vectorization detail select concat(c1, c2), concat(c3, c4), @@ -33,13 +33,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- UDFs with varchar support -explain +explain vectorization detail select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -54,18 +58,42 @@ STAGE PLANS: Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringGroupConcatColCol[4:String_Family], StringGroupConcatColCol[5:String_Family], StringGroupColEqualStringGroupColumn[9:boolean](StringGroupConcatColCol[6:String_Family] CastStringGroupToString[8:String](StringGroupConcatColCol[7:String_Family])) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -90,20 +118,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238val_238 238val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -118,18 +150,42 @@ STAGE PLANS: Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringUpper[4:String], StringUpper[5:String], StringGroupColEqualStringGroupColumn[9:boolean](StringUpper[6:String] CastStringGroupToString[8:String](StringUpper[7:String])) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -154,20 +210,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### VAL_238 VAL_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -182,18 +242,42 @@ STAGE PLANS: Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringLower[4:String], StringLower[5:String], StringGroupColEqualStringGroupColumn[9:boolean](StringLower[6:String] CastStringGroupToString[8:String](StringLower[7:String])) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -219,7 +303,7 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Scalar UDFs -explain +explain vectorization detail select ascii(c2), ascii(c4), @@ -227,13 +311,17 @@ select from varchar_udf_1 limit 1 PREHOOK: type: QUERY POSTHOOK: query: -- Scalar UDFs -explain +explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -259,6 +347,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> ascii (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -283,20 +377,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 118 118 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -322,6 +420,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFConcatWS(Const string |, Column[c1], Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -346,20 +450,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238|val_238 238|val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -385,6 +493,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFDecode(GenericUDFEncode(Column[c2], Const string US-ASCII), Const string US-ASCII) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -409,20 +523,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -448,6 +566,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFInstr(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -472,20 +596,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 4 4 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -500,18 +628,42 @@ STAGE PLANS: Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringLength[4:Long], StringLength[5:Long], LongColEqualLongColumn[8:long](StringLength[6:Long] StringLength[7:Long]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -536,20 +688,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 7 7 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -564,18 +720,42 @@ STAGE PLANS: Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: ConstantVectorExpression[4:long], ConstantVectorExpression[5:long], ConstantVectorExpression[6:long] + vectorized: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -600,20 +780,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 5 5 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -639,6 +823,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFLpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -663,20 +853,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -691,18 +885,42 @@ STAGE PLANS: Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringLTrim[4:String], StringLTrim[5:String], StringGroupColEqualStringGroupColumn[8:boolean](StringLTrim[6:String] StringLTrim[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -727,20 +945,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -755,18 +977,42 @@ STAGE PLANS: Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[4:Long], VectorUDFAdaptor[5:Long], LongColEqualLongColumn[8:long](VectorUDFAdaptor[6:Long] VectorUDFAdaptor[7:Long]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -791,20 +1037,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -819,18 +1069,42 @@ STAGE PLANS: Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[4:String], VectorUDFAdaptor[5:String], StringGroupColEqualStringGroupColumn[8:boolean](VectorUDFAdaptor[6:String] VectorUDFAdaptor[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -855,20 +1129,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -883,18 +1161,42 @@ STAGE PLANS: Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: VectorUDFAdaptor[4:String], VectorUDFAdaptor[5:String], StringGroupColEqualStringGroupColumn[8:boolean](VectorUDFAdaptor[6:String] VectorUDFAdaptor[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -919,20 +1221,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -958,6 +1264,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFBridge ==> reverse (Column[c2]) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -982,20 +1294,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 832_lav 832_lav true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1021,6 +1337,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFRpad(Column[c2], Const int 15, Const string ) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1045,20 +1367,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1073,18 +1399,42 @@ STAGE PLANS: Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringRTrim[4:String], StringRTrim[5:String], StringGroupColEqualStringGroupColumn[8:boolean](StringRTrim[6:String] StringRTrim[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1109,18 +1459,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1146,6 +1500,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array> of GenericUDFSentences(Const string See spot run. See jane run.) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1168,18 +1528,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1205,6 +1569,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array of GenericUDFSplit(Column[c2], Const string _) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1227,18 +1597,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### ["val","238"] ["val","238"] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1264,6 +1638,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map of GenericUDFStringToMap(Const string a:1,b:2,c:3, Const string ,, Const string :) not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1285,21 +1665,25 @@ from varchar_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: explain +{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} +PREHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1314,18 +1698,42 @@ STAGE PLANS: Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringSubstrColStartLen[4:string], StringSubstrColStartLen[5:string], StringGroupColEqualStringGroupColumn[8:boolean](StringSubstrColStartLen[6:string] StringSubstrColStartLen[7:string]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1350,20 +1758,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val val true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1378,18 +1790,42 @@ STAGE PLANS: Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: StringTrim[4:String], StringTrim[5:String], StringGroupColEqualStringGroupColumn[8:boolean](StringTrim[6:String] StringTrim[7:String]) + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -1415,19 +1851,23 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: -- Aggregate Functions -explain +explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: -- Aggregate Functions -explain +explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1452,6 +1892,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -1487,18 +1937,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1513,17 +1967,48 @@ STAGE PLANS: Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[3:varchar(20)] + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(c2), min(c4) + Group By Vectorization: + aggregators: VectorUDAFMinString(IdentityExpression[1:string]), VectorUDAFMinString(IdentityExpression[3:varchar(20)]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) @@ -1559,18 +2044,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1585,17 +2074,48 @@ STAGE PLANS: Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:string], IdentityExpression[3:varchar(20)] + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(c2), max(c4) + Group By Vectorization: + aggregators: VectorUDAFMaxString(IdentityExpression[1:string]), VectorUDAFMaxString(IdentityExpression[3:varchar(20)]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vector_udf2.q.out ql/src/test/results/clientpositive/vector_udf2.q.out index 12c083f..d4178a0 100644 --- ql/src/test/results/clientpositive/vector_udf2.q.out +++ ql/src/test/results/clientpositive/vector_udf2.q.out @@ -24,7 +24,7 @@ POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type: POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -34,7 +34,7 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c1 LIKE '%38%', c2 LIKE 'val_%', @@ -44,6 +44,10 @@ select c3 LIKE '%x38' from varchar_udf_2 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -58,18 +62,42 @@ STAGE PLANS: Select Operator expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: SelectStringColLikeStringScalar[4:String_Family], SelectStringColLikeStringScalar[5:String_Family], SelectStringColLikeStringScalar[6:String_Family], SelectStringColLikeStringScalar[7:String_Family], SelectStringColLikeStringScalar[8:String_Family], SelectStringColLikeStringScalar[9:String_Family] + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf3.q.out ql/src/test/results/clientpositive/vector_udf3.q.out index 7c6a90a..084e26d 100644 --- ql/src/test/results/clientpositive/vector_udf3.q.out +++ ql/src/test/results/clientpositive/vector_udf3.q.out @@ -4,10 +4,14 @@ PREHOOK: Output: rot13 POSTHOOK: query: CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: rot13 -PREHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +PREHOOK: query: EXPLAIN VECTORIZATION SELECT rot13(cstring1) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT rot13(cstring1) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -15,28 +19,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: Rot13(cstring1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out index f7c9cd0..eb94687 100644 --- ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi v POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -147,9 +151,20 @@ STAGE PLANS: Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToVarChar[13:VarChar], CastLongToVarChar[14:VarChar], CastLongToVarChar[15:VarChar], CastLongToVarChar[16:VarChar], VectorUDFAdaptor[17:varchar(20)], VectorUDFAdaptor[18:varchar(20)], CastStringGroupToVarChar[19:VarChar] + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out index a6c4de9..765ea2d 100644 --- ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out @@ -125,11 +125,15 @@ POSTHOOK: Output: default@varchar_join1_str_orc POSTHOOK: Lineage: varchar_join1_str_orc.c1 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: varchar_join1_str_orc.c2 SIMPLE [(varchar_join1_str)varchar_join1_str.FieldSchema(name:c2, type:string, comment:null), ] PREHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with same length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -140,7 +144,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:a @@ -161,51 +164,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: varchar(10)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: varchar(10)) - 1 _col1 (type: varchar(10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(10)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from varchar_join1_vc1_orc a join varchar_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY @@ -219,11 +195,15 @@ POSTHOOK: Input: default@varchar_join1_vc1_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with different length varchar -explain select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -234,7 +214,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:a @@ -255,51 +234,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: varchar(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: varchar(20)) - 1 _col1 (type: varchar(20)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: varchar(20)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: varchar(20)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from varchar_join1_vc1_orc a join varchar_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY @@ -315,11 +267,15 @@ POSTHOOK: Input: default@varchar_join1_vc2_orc 2 abc 2 abc 3 abc 3 abc PREHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY POSTHOOK: query: -- Join varchar with string -explain select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +explain vectorization select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -330,7 +286,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:b @@ -351,51 +306,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: varchar(10)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToString(_col1) (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select * from varchar_join1_vc1_orc a join varchar_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 49d8b34..6526c67 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,42 +66,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: varchar(10)), value (type: varchar(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: varchar(10)) - sort order: + - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: varchar(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -139,16 +123,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization select key, value from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -156,42 +144,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: varchar(10)), value (type: varchar(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: varchar(10)) - sort order: - - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: varchar(20)) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 5 - Processor Tree: - ListSink PREHOOK: query: -- should match the query from src select key, value @@ -236,12 +204,16 @@ create table varchar_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail insert into table varchar_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -257,16 +229,45 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 5c981fb..0602da3 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -20,12 +20,16 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,9 +44,23 @@ STAGE PLANS: Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:string], VectorUDFAdaptor[3:Long](NotCol[2:boolean]) + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[3:int]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -51,9 +69,27 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorization_0.q.out ql/src/test/results/clientpositive/vectorization_0.q.out index ac33721..42d6ac8 100644 --- ql/src/test/results/clientpositive/vectorization_0.q.out +++ ql/src/test/results/clientpositive/vectorization_0.q.out @@ -1,7 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -12,7 +12,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -35,17 +39,48 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[0:tinyint]), VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFCount(IdentityExpression[0:tinyint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -68,6 +103,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -106,16 +149,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -131,17 +178,48 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -163,6 +241,14 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -195,7 +281,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39856 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -208,7 +294,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(ctinyint) as c1, variance(ctinyint), @@ -221,6 +307,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -229,64 +319,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(ctinyint) as c1, @@ -317,7 +376,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -325,7 +384,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -333,6 +392,10 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -348,17 +411,48 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(IdentityExpression[3:bigint]), VectorUDAFMaxLong(IdentityExpression[3:bigint]), VectorUDAFCount(IdentityExpression[3:bigint]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -381,6 +475,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -419,16 +521,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -444,17 +550,48 @@ STAGE PLANS: Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[3:bigint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -476,6 +613,14 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -508,7 +653,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1698460028409 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -521,7 +666,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cbigint) as c1, variance(cbigint), @@ -534,6 +679,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -542,64 +691,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(cbigint) as c1, @@ -630,7 +748,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -638,7 +756,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -646,6 +764,10 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -661,17 +783,48 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(IdentityExpression[4:float]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFCount(IdentityExpression[4:float]), VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) @@ -694,6 +847,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -732,16 +893,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -757,17 +922,48 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -789,6 +985,14 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -821,7 +1025,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -39479.635992884636 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -834,7 +1038,7 @@ SELECT FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT avg(cfloat) as c1, variance(cfloat), @@ -847,6 +1051,10 @@ SELECT FROM alltypesorc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -855,64 +1063,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT avg(cfloat) as c1, @@ -944,7 +1121,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -971,7 +1148,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -998,6 +1175,10 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1010,14 +1191,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterStringColLikeStringScalar[-1:boolean] FilterDecimalScalarNotEqualDecimalColumn[-1:boolean](CastLongToDecimal[12:decimal(13,3)]) FilterDoubleColLessDoubleColumn[-1:boolean](CastLongToDouble[13:double]) FilterExprAndExpr[-1:boolean](FilterLongColGreaterEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterLongColEqualLongScalar[-1:boolean] FilterLongScalarEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]))) + vectorized: true predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[3:bigint], IdentityExpression[4:float], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(IdentityExpression[3:bigint]), VectorUDAFStdPopLong(IdentityExpression[3:bigint]), VectorUDAFVarSampLong(IdentityExpression[3:bigint]), VectorUDAFCountStar, VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(IdentityExpression[3:bigint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE @@ -1026,6 +1227,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index e82e474..cc7878f 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -35,7 +35,7 @@ LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -81,14 +85,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -100,6 +125,18 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -127,6 +164,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) @@ -258,7 +303,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0 NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -292,7 +337,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, LIMIT 40 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -325,6 +370,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -338,14 +387,35 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](FilterDoubleColLessDoubleScalar[-1:boolean] FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean] FilterLongColNotEqualLongScalar[-1:boolean]) FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDecimalColLessDecimalScalar[-1:boolean](CastLongToDecimal[13:decimal(11,4)]))) + vectorized: true predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(IdentityExpression[0:tinyint]), VectorUDAFSumDouble(IdentityExpression[4:float]), VectorUDAFStdPopDouble(IdentityExpression[4:float]), VectorUDAFStdPopLong(IdentityExpression[0:tinyint]), VectorUDAFMaxDouble(IdentityExpression[4:float]), VectorUDAFMinLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[10:boolean], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[4:float], IdentityExpression[6:string] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(IdentityExpression[4:float]) output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(IdentityExpression[0:tinyint]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -357,6 +427,18 @@ STAGE PLANS: Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) @@ -384,6 +466,14 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 34923d6..7a03e34 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT ctimestamp1, cfloat, cstring1, @@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -76,75 +80,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) - sort order: ++++ - Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cfloat, diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index a2edab4..6d56e49 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cboolean1, cdouble, @@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -72,75 +76,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) - outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - sort order: +++++++ - Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) - keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - sort order: +++++++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cboolean1, diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index c297239..4b2cb7c 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,54 +53,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(38,16)), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cstring1, cdouble, diff --git ql/src/test/results/clientpositive/vectorization_17.q.out ql/src/test/results/clientpositive/vectorization_17.q.out index e01bea2..92e28c5 100644 --- ql/src/test/results/clientpositive/vectorization_17.q.out +++ ql/src/test/results/clientpositive/vectorization_17.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cfloat, cstring1, cint, @@ -50,6 +50,10 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,41 +61,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: bigint), _col0 (type: float) - sort order: ++ - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cstring1, diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index 25e7657..01604dc 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -29,7 +29,7 @@ LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -56,6 +56,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -68,18 +72,48 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) @@ -184,7 +218,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -212,7 +246,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, LIMIT 25 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -239,6 +273,10 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,18 +289,48 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColNotEqualLongScalar[-1:boolean] FilterExprOrExpr[-1:boolean](FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterLongColEqualLongColumn[-1:boolean](IdentityExpression[0:tinyint]) FilterStringColLikeStringScalar[-1:boolean]) FilterExprOrExpr[-1:boolean](FilterDoubleScalarLessDoubleColumn[-1:boolean] FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleScalarGreaterEqualDoubleColumn[-1:boolean]))) + vectorized: true predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean], IdentityExpression[3:bigint], IdentityExpression[1:smallint], IdentityExpression[0:tinyint], IdentityExpression[8:timestamp], IdentityExpression[6:string], LongColAddLongColumn[13:long], LongColModuloLongScalar[14:long](IdentityExpression[1:smallint]), LongColUnaryMinus[15:long], LongColUnaryMinus[16:long], LongColAddLongScalar[18:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColMultiplyLongColumn[19:long](IdentityExpression[17:long](LongColUnaryMinus[17:long])), LongColModuloLongColumn[17:long](IdentityExpression[1:smallint]), LongColUnaryMinus[20:long], LongColModuloLongColumn[22:long](LongColUnaryMinus[21:long]) + vectorized: true Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorization_8.q.out ql/src/test/results/clientpositive/vectorization_8.q.out index 3fa8f53..779a1e7 100644 --- ql/src/test/results/clientpositive/vectorization_8.q.out +++ ql/src/test/results/clientpositive/vectorization_8.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -27,7 +27,7 @@ LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,18 +68,48 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) @@ -171,7 +205,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL 1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL PREHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -197,7 +231,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: -- double compare timestamp -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -222,6 +256,10 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -234,18 +272,48 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColLessEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double]) FilterDoubleColNotEqualDoubleScalar[-1:boolean](CastTimestampToDouble[12:double])) FilterDoubleColLessDoubleScalar[-1:boolean] FilterExprAndExpr[-1:boolean](SelectColumnIsNotNull[-1:boolean] FilterDoubleColEqualDoubleScalar[-1:boolean])) + vectorized: true predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[8:timestamp], IdentityExpression[5:double], IdentityExpression[10:boolean], IdentityExpression[6:string], IdentityExpression[4:float], DoubleColUnaryMinus[12:double], DoubleScalarSubtractDoubleColumn[13:double], DoubleColMultiplyDoubleScalar[14:double], DoubleColAddDoubleColumn[16:double](CastLongToFloatViaLongToDouble[15:double]), DoubleColAddDoubleColumn[18:double](DoubleColUnaryMinus[15:double] CastLongToDouble[17:double]), DoubleColUnaryMinus[15:double], DoubleScalarSubtractDoubleColumn[17:double], DoubleColUnaryMinus[19:double], DoubleColAddDoubleColumn[21:double](DoubleScalarSubtractDoubleColumn[20:double] IdentityExpression[22:double](DoubleColAddDoubleColumn[22:double](CastLongToFloatViaLongToDouble[21:double]))) + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index c297239..4b2cb7c 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN +EXPLAIN VECTORIZATION SELECT cstring1, cdouble, ctimestamp1, @@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,54 +53,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(38,16)), _col4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT cstring1, cdouble, diff --git ql/src/test/results/clientpositive/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/vectorization_decimal_date.q.out index 6cae52c..102d966 100644 --- ql/src/test/results/clientpositive/vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/vectorization_decimal_date.q.out @@ -12,10 +12,14 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -23,34 +27,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_decimal_test - Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint is not null and cdouble is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdate (type: date), cdecimal (type: decimal(20,10)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vectorization_div0.q.out ql/src/test/results/clientpositive/vectorization_div0.q.out index 63af5c9..f898723 100644 --- ql/src/test/results/clientpositive/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/vectorization_div0.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +explain vectorization detail select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,18 +24,42 @@ STAGE PLANS: Select Operator expressions: (cdouble / 0.0) (type: double) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: DoubleColDivideDoubleScalar[12:double] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -150,17 +178,21 @@ NULL PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +explain vectorization detail select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -173,19 +205,49 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterLongColGreaterLongScalar[-1:boolean] FilterLongColLessLongScalar[-1:boolean]) + vectorized: true predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: LongColSubtractLongScalar[12:long], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[14:double](LongColSubtractLongScalar[13:long])), DecimalScalarDivideDecimalColumn[17:decimal(22,21)](CastLongToDecimal[16:decimal(19,0)](LongColSubtractLongScalar[13:long])) + vectorized: true Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) @@ -320,16 +382,20 @@ POSTHOOK: Input: default@alltypesorc 60330397 NULL 0.000000019890470801974 PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero -explain +explain vectorization detail select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -342,19 +408,49 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprAndExpr[-1:boolean](FilterDoubleColGreaterEqualDoubleScalar[-1:boolean] FilterDoubleColLessDoubleScalar[-1:boolean]) + vectorized: true predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: DoubleColAddDoubleScalar[12:double], DoubleColDivideDoubleColumn[15:double](CastLongToDouble[13:double] DoubleColAddDoubleScalar[14:double]), DoubleColDivideDoubleColumn[16:double](DoubleColAddDoubleScalar[13:double] DoubleColAddDoubleScalar[14:double]), DoubleScalarDivideDoubleColumn[14:double](DoubleColAddDoubleScalar[13:double]), DoubleScalarDivideDoubleColumn[17:double](DoubleColAddDoubleScalar[13:double]) + vectorized: true Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index eb2a692..e288114 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10,34 +14,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 7 - Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 @@ -57,14 +45,18 @@ POSTHOOK: Input: default@alltypesorc -1887561756 9531.0 PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain +explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,19 +69,49 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double], IdentityExpression[1:smallint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) @@ -141,13 +163,17 @@ POSTHOOK: Input: default@alltypesorc -64 -1600.0 -1600 -64 -200.0 -200 PREHOOK: query: -- deduped RS -explain +explain vectorization detail select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- deduped RS -explain +explain vectorization detail select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -162,9 +188,24 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], DoubleColAddDoubleScalar[12:double] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(IdentityExpression[12:double]) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(IdentityExpression[12:double]) output type STRUCT requires PRIMITIVE IS false + vectorized: true keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -177,6 +218,18 @@ STAGE PLANS: TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -230,13 +283,17 @@ NULL 9370.0945309795 -47 -574.6428571428571 -46 3033.55 PREHOOK: query: -- distincts -explain +explain vectorization detail select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- distincts -explain +explain vectorization detail select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -251,8 +308,21 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 @@ -261,9 +331,27 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) @@ -315,12 +403,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -335,9 +427,23 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[5:double]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -346,9 +452,27 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false, No DISTINCT columns IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -402,13 +526,17 @@ NULL 2932 -47 22 -46 24 PREHOOK: query: -- limit zero -explain +explain vectorization detail select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY POSTHOOK: query: -- limit zero -explain +explain vectorization detail select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -428,13 +556,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization detail select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- 2MR (applied to last RS) -explain +explain vectorization detail select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -448,14 +580,34 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[5:double], IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: IdentityExpression[5:double] + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -464,9 +616,27 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -490,6 +660,14 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) diff --git ql/src/test/results/clientpositive/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/vectorization_offset_limit.q.out index 7f41283..774ae35 100644 --- ql/src/test/results/clientpositive/vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_offset_limit.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10,35 +14,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Offset of rows: 3 - Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 2 - Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 @@ -51,12 +38,16 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1887561756 10361.0 -1887561756 -8881.0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -69,19 +60,49 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint], IdentityExpression[5:double], IdentityExpression[1:smallint] + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) diff --git ql/src/test/results/clientpositive/vectorization_part_project.q.out ql/src/test/results/clientpositive/vectorization_part_project.q.out index 5463c36..0953c10 100644 --- ql/src/test/results/clientpositive/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/vectorization_part_project.q.out @@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 +POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,41 +61,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (cdouble + 2.0) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2720 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2720 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vectorization_pushdown.q.out ql/src/test/results/clientpositive/vectorization_pushdown.q.out index 04780c4..67ad568 100644 --- ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -1,8 +1,12 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble +POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -10,47 +14,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble diff --git ql/src/test/results/clientpositive/vectorization_short_regress.q.out ql/src/test/results/clientpositive/vectorization_short_regress.q.out index a101668..7fb63a1 100644 --- ql/src/test/results/clientpositive/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/vectorization_short_regress.q.out @@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- ArithmeticOps: Add, Multiply, Subtract, Divide -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(cint), (AVG(cint) + -3728), (-((AVG(cint) + -3728))), (-((-((AVG(cint) + -3728))))), @@ -134,6 +136,10 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -141,50 +147,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) - outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT AVG(cint), (AVG(cint) + -3728), @@ -268,7 +234,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -306,7 +273,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Bool, Double, String, Timestamp -- ArithmeticOps: Divide, Multiply, Remainder, Subtract -- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT MAX(cint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT MAX(cint), (MAX(cint) / -3728), (MAX(cint) * -3728), VAR_POP(cbigint), @@ -339,6 +307,10 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -346,50 +318,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) - Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) - outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint - Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT MAX(cint), (MAX(cint) / -3728), @@ -467,7 +399,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -504,7 +437,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Long, Bool, Double, Timestamp -- ArithmeticOps: Subtract, Remainder, Multiply, Add -- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT VAR_POP(cbigint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))), COUNT(*), @@ -536,6 +470,10 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -543,50 +481,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) - outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -663,7 +561,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -690,7 +589,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Timestamp, Long, Double -- ArithmeticOps: Add, Divide, Remainder, Multiply -- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal -- GroupBy: NoGroupByProjectAggs -EXPLAIN SELECT AVG(ctinyint), +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT AVG(ctinyint), (AVG(ctinyint) + 6981), ((AVG(ctinyint) + 6981) + AVG(ctinyint)), MAX(cbigint), @@ -712,6 +612,10 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -719,50 +623,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cint, cfloat - Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(ctinyint), @@ -819,7 +683,8 @@ PREHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -860,7 +725,8 @@ POSTHOOK: query: -- TargetTypeClasses: Timestamp, String, Long, Double, Bool -- ArithmeticOps: Multiply, Subtract, Add, Divide -- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cdouble, ctimestamp2, cstring1, @@ -896,6 +762,10 @@ WHERE (((cstring1 RLIKE 'a.*') ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -903,44 +773,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) - Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(20,18)) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(20,18)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cdouble, @@ -1073,7 +909,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1113,7 +950,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Bool, Timestamp -- ArithmeticOps: Divide, Remainder, Subtract, Multiply -- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cbigint, cstring1, cboolean1, @@ -1148,6 +986,10 @@ WHERE (((197 > ctinyint) ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15 LIMIT 25 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1155,44 +997,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) - sort order: +++++++++++++++++++++++++ - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 25 - Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 25 Processor Tree: - ListSink PREHOOK: query: SELECT cint, cbigint, @@ -1276,7 +1084,8 @@ PREHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1315,7 +1124,8 @@ POSTHOOK: query: -- TargetTypeClasses: String, Bool, Double, Long, Timestamp -- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder -- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT cint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cint, cstring1, cboolean2, ctimestamp2, @@ -1349,6 +1159,10 @@ WHERE (((csmallint > -26.28) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13 LIMIT 75 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1356,45 +1170,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(19,18)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(19,18)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) - sort order: +++++++++++++++++++++++ - Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: boolean) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(19,18)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 75 - Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 75 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cint, @@ -1549,7 +1328,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1581,7 +1361,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, String, Double, Timestamp -- ArithmeticOps: Divide, Subtract, Multiply, Remainder -- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like -- GroupBy: NoGroupByProjectColumns -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring2, cdouble, cfloat, @@ -1608,6 +1389,10 @@ WHERE (((-1.389 >= cint) ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 45 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1615,45 +1400,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) - Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) - sort order: +++++++++++++++ - Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: timestamp) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 45 - Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 45 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring2, @@ -1763,7 +1513,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1788,7 +1539,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, String, Long -- ArithmeticOps: Remainder, Divide, Subtract -- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT csmallint, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT csmallint, (csmallint % -75) as c1, STDDEV_SAMP(csmallint) as c2, (-1.389 / csmallint) as c3, @@ -1808,6 +1560,10 @@ GROUP BY csmallint ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 LIMIT 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1816,78 +1572,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) - outputColumnNames: csmallint, cbigint, ctinyint - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() - keys: csmallint (type: smallint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) - keys: KEY._col0 (type: smallint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(19,18)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(19,18)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) - sort order: +++++++++++ - Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(19,18)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 20 Processor Tree: - ListSink PREHOOK: query: SELECT csmallint, (csmallint % -75) as c1, @@ -1959,7 +1650,8 @@ PREHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -1991,7 +1683,8 @@ POSTHOOK: query: -- TargetTypeClasses: Long, Double, Timestamp -- ArithmeticOps: Multiply, Add, Subtract, Remainder -- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual -- GroupBy: GroupBy -EXPLAIN SELECT cdouble, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cdouble, VAR_SAMP(cdouble), (2563.58 * VAR_SAMP(cdouble)), (-(VAR_SAMP(cdouble))), @@ -2018,6 +1711,10 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2026,75 +1723,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) - Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), cfloat (type: float) - outputColumnNames: cdouble, cfloat - Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cdouble, @@ -2160,7 +1795,8 @@ PREHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2220,7 +1856,8 @@ POSTHOOK: query: -- TargetTypeClasses: Bool, Timestamp, String, Double, Long -- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder -- FilterOps: NotEqual, LessThan, Like, Equal, RLike -- GroupBy: GroupBy -EXPLAIN SELECT ctimestamp1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT ctimestamp1, cstring1, STDDEV_POP(cint) as c1, (STDDEV_POP(cint) * 10.175) as c2, @@ -2275,6 +1912,10 @@ GROUP BY ctimestamp1, cstring1 ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37 LIMIT 50 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2283,78 +1924,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(20,18)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(20,18)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) - sort order: +++++++++++++++++++++++++++++++++++++++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: tinyint), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: double), KEY.reducesinkkey22 (type: double), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey24 (type: double), KEY.reducesinkkey25 (type: double), KEY.reducesinkkey26 (type: double), KEY.reducesinkkey27 (type: tinyint), KEY.reducesinkkey28 (type: double), KEY.reducesinkkey29 (type: double), KEY.reducesinkkey30 (type: double), KEY.reducesinkkey31 (type: double), KEY.reducesinkkey32 (type: decimal(20,18)), KEY.reducesinkkey33 (type: double), KEY.reducesinkkey34 (type: bigint), KEY.reducesinkkey35 (type: double), KEY.reducesinkkey36 (type: bigint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey38 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 50 Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, cstring1, @@ -2525,7 +2101,8 @@ PREHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2570,7 +2147,8 @@ POSTHOOK: query: -- TargetTypeClasses: Double, Long, String, Timestamp, Bool -- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply -- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike -- GroupBy: GroupBy -EXPLAIN SELECT cboolean1, +EXPLAIN VECTORIZATION ONLY DETAIL +SELECT cboolean1, MAX(cfloat), (-(MAX(cfloat))), (-26.28 / MAX(cfloat)), @@ -2610,6 +2188,10 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2618,75 +2200,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) - outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint - Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) - keys: cboolean1 (type: boolean) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (- _col1) (type: float), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double) - outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: decimal(23,3)), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: float), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col12 (type: float), VALUE._col15 (type: bigint), VALUE._col16 (type: double), VALUE._col17 (type: decimal(24,3)), VALUE._col18 (type: decimal(25,3)), VALUE._col19 (type: double), VALUE._col20 (type: decimal(25,3)), VALUE._col21 (type: double), VALUE._col22 (type: double), VALUE._col23 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 - Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: -1 Processor Tree: - ListSink PREHOOK: query: SELECT cboolean1, MAX(cfloat), @@ -2784,12 +2304,16 @@ create table test_count(i int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_count -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2802,17 +2326,47 @@ STAGE PLANS: alias: test_count Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2842,12 +2396,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_count #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(i) from test_count PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(i) from test_count POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2862,17 +2420,48 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(i) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2976,12 +2565,16 @@ POSTHOOK: Lineage: alltypesnullorc.cstring2 SIMPLE [(alltypesnull)alltypesnull.F POSTHOOK: Lineage: alltypesnullorc.ctimestamp1 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctimestamp2 SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesnullorc.ctinyint SIMPLE [(alltypesnull)alltypesnull.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(*) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(*) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2994,17 +2587,47 @@ STAGE PLANS: alias: alltypesnullorc Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3034,12 +2657,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 12288 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(ctinyint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3054,17 +2681,48 @@ STAGE PLANS: Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:tinyint] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[0:tinyint]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3094,12 +2752,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cint) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3114,17 +2776,48 @@ STAGE PLANS: Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[2:int] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cint) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[2:int]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3154,12 +2847,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cfloat) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3174,17 +2871,48 @@ STAGE PLANS: Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[4:float] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cfloat) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[4:float]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3214,12 +2942,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cstring1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3234,17 +2966,48 @@ STAGE PLANS: Select Operator expressions: cstring1 (type: string) outputColumnNames: cstring1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[6:string] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cstring1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[6:string]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3274,12 +3037,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesnullorc #### A masked pattern was here #### 0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(cboolean1) from alltypesnullorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3294,17 +3061,48 @@ STAGE PLANS: Select Operator expressions: cboolean1 (type: boolean) outputColumnNames: cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[10:boolean] + vectorized: true Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cboolean1) + Group By Vectorization: + aggregators: VectorUDAFCount(IdentityExpression[10:boolean]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out index d68f343..6d839f9 100644 --- ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -112,6 +116,12 @@ STAGE PLANS: alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -121,16 +131,40 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -152,12 +186,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,6 +208,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -179,16 +223,40 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -215,7 +283,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box @@ -223,9 +291,13 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; -explain +explain vectorization detail select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -238,6 +310,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator @@ -247,16 +325,40 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + SMB Map Join Vectorization: + className: VectorSMBMapJoinOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int], IdentityExpression[1:string], IdentityExpression[2:int], IdentityExpression[3:string] + vectorized: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 08c1412..b9c6244 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,20 +50,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -109,7 +138,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select csmallint, case @@ -127,7 +156,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -145,6 +174,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -157,20 +190,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterExprOrExpr[-1:boolean](FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean] FilterLongColEqualLongScalar[-1:boolean]) + vectorized: true predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[1:smallint], VectorUDFAdaptor[14:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]), VectorUDFAdaptor[15:String](LongColEqualLongScalar[12:long] LongColEqualLongScalar[13:long]) + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_casts.q.out ql/src/test/results/clientpositive/vectorized_casts.q.out index 8520125..90b8b05 100644 --- ql/src/test/results/clientpositive/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none) -- Test type casting in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select -- to boolean cast (ctinyint as boolean) @@ -156,6 +156,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,31 +167,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cbigint % 250) = 0) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToBoolean(ctinyint) (type: boolean), UDFToBoolean(csmallint) (type: boolean), UDFToBoolean(cint) (type: boolean), UDFToBoolean(cbigint) (type: boolean), UDFToBoolean(cfloat) (type: boolean), UDFToBoolean(cdouble) (type: boolean), cboolean1 (type: boolean), UDFToBoolean((cbigint * 0)) (type: boolean), UDFToBoolean(ctimestamp1) (type: boolean), UDFToBoolean(cstring1) (type: boolean), UDFToInteger(ctinyint) (type: int), UDFToInteger(csmallint) (type: int), cint (type: int), UDFToInteger(cbigint) (type: int), UDFToInteger(cfloat) (type: int), UDFToInteger(cdouble) (type: int), UDFToInteger(cboolean1) (type: int), UDFToInteger(ctimestamp1) (type: int), UDFToInteger(cstring1) (type: int), UDFToInteger(substr(cstring1, 1, 1)) (type: int), UDFToByte(cfloat) (type: tinyint), UDFToShort(cfloat) (type: smallint), UDFToLong(cfloat) (type: bigint), UDFToDouble(ctinyint) (type: double), UDFToDouble(csmallint) (type: double), UDFToDouble(cint) (type: double), UDFToDouble(cbigint) (type: double), UDFToDouble(cfloat) (type: double), cdouble (type: double), UDFToDouble(cboolean1) (type: double), UDFToDouble(ctimestamp1) (type: double), UDFToDouble(cstring1) (type: double), UDFToDouble(substr(cstring1, 1, 1)) (type: double), UDFToFloat(cint) (type: float), UDFToFloat(cdouble) (type: float), CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp), UDFToString(ctinyint) (type: string), UDFToString(csmallint) (type: string), UDFToString(cint) (type: string), UDFToString(cbigint) (type: string), UDFToString(cfloat) (type: string), UDFToString(cdouble) (type: string), UDFToString(cboolean1) (type: string), UDFToString((cbigint * 0)) (type: string), UDFToString(ctimestamp1) (type: string), cstring1 (type: string), UDFToString(CAST( cstring1 AS CHAR(10)) (type: string), UDFToString(CAST( cstring1 AS varchar(10))) (type: string), UDFToFloat(UDFToInteger(cfloat)) (type: float), UDFToDouble((cint * 2)) (type: double), UDFToString(sin(cfloat)) (type: string), (UDFToDouble(UDFToFloat(cint)) + UDFToDouble(cboolean1)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select -- to boolean diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index 913d07c..508412f 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-7 is a root stage Stage-5 depends on stages: Stage-7 @@ -106,10 +110,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:store_sales - Fetch Operator limit: -1 $hdt$_2:household_demographics - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:store_sales @@ -145,56 +147,20 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Stage: Stage-0 - Fetch Operator limit: 100 - Processor Tree: - ListSink PREHOOK: query: select store.s_city, ss_net_profit from store_sales diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 68a2a4d..d5d85d4 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_time), year(fl_time), month(fl_time), @@ -233,6 +233,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -240,28 +244,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_time), @@ -434,7 +428,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -449,7 +443,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(fl_date), year(fl_date), month(fl_date), @@ -464,6 +458,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -471,28 +469,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(fl_date), @@ -665,7 +653,7 @@ POSTHOOK: Input: default@date_udf_flight_orc 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -679,7 +667,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT year(fl_time) = year(fl_date), month(fl_time) = month(fl_date), day(fl_time) = day(fl_date), @@ -693,6 +681,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") FROM date_udf_flight_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -700,28 +692,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- Should all be true or NULL SELECT @@ -894,7 +876,7 @@ true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -903,7 +885,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -912,6 +894,10 @@ POSTHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -919,31 +905,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT fl_date, @@ -988,7 +961,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -996,7 +969,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1004,6 +977,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1012,64 +989,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: fl_date (type: date) - outputColumnNames: fl_date - Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(fl_date), max(fl_date), count(fl_date), count() - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT min(fl_date) AS c1, diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index f8ae962..727c0d0 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,45 +31,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: dtest - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT a), count(DISTINCT a) - bucketGroup: true - keys: a (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY @@ -76,10 +57,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,44 +72,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: cint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT cint), count(DISTINCT cint), avg(DISTINCT cint), std(DISTINCT cint) - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index b414551..eb5aae9 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -20,7 +24,6 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:t1 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:$hdt$_0:t1 @@ -41,60 +44,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 73b61a2..3d0bd14 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -32,12 +32,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@y POSTHOOK: Lineage: y.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select count(1) from x, y where a = b PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select count(1) from x, y where a = b POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -74,11 +78,23 @@ STAGE PLANS: alias: y Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: SelectColumnIsNotNull[-1:boolean] + vectorized: true predicate: b is not null (type: boolean) Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:int] + vectorized: true Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,19 +102,50 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorized: true Statistics: Num rows: 49 Data size: 199 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression[0:long]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/vectorized_math_funcs.q.out index 7b7dedc..ce37284 100644 --- ql/src/test/results/clientpositive/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_math_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -54,7 +54,7 @@ and sin(cfloat) >= -1.0 PREHOOK: type: QUERY POSTHOOK: query: -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization select cdouble ,Round(cdouble, 2) @@ -106,6 +106,10 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -113,31 +117,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select cdouble diff --git ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out index ab22912..7024826 100644 --- ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out @@ -1,11 +1,15 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 @@ -16,10 +20,8 @@ STAGE PLANS: Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:v2 - Fetch Operator limit: -1 $hdt$_1:v3 - Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:$hdt$_0:v2 @@ -55,68 +57,24 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - alias: v1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: smallint), _col3 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vectorized_parquet.q.out ql/src/test/results/clientpositive/vectorized_parquet.q.out index 6cba9f1..a05eeca 100644 --- ql/src/test/results/clientpositive/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet.q.out @@ -46,16 +46,20 @@ POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldS POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -PREHOOK: query: explain select * +PREHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * +POSTHOOK: query: explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,33 +67,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 - Fetch Operator limit: 10 - Processor Tree: - ListSink PREHOOK: query: select * from alltypes_parquet @@ -115,7 +99,7 @@ POSTHOOK: Input: default@alltypes_parquet 528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p 528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p 528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p -PREHOOK: query: explain select ctinyint, +PREHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -124,7 +108,7 @@ PREHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain select ctinyint, +POSTHOOK: query: explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), @@ -133,6 +117,10 @@ POSTHOOK: query: explain select ctinyint, from alltypes_parquet group by ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -140,46 +128,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select ctinyint, max(cint), diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 281fe93..ba5b1b1 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -118,15 +118,19 @@ POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: -- select -explain +explain vectorization detail SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types PREHOOK: type: QUERY POSTHOOK: query: -- select -explain +explain vectorization detail SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -149,6 +153,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 Fetch Operator @@ -188,12 +196,16 @@ POSTHOOK: Input: default@parquet_types 119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -216,6 +228,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Stage: Stage-0 Fetch Operator @@ -253,7 +269,7 @@ uvwzy 5 abcdede 7 4.76 1 vwxyz 5 abcdede 7 12.83 1 wxyza 5 abcde 5 73.04 1 bcdef 5 abcde 5 90.33 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -265,7 +281,7 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -277,6 +293,10 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -305,6 +325,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) @@ -328,6 +356,14 @@ STAGE PLANS: sort order: + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) diff --git ql/src/test/results/clientpositive/vectorized_ptf.q.out ql/src/test/results/clientpositive/vectorized_ptf.q.out index 7fb966a..3c09f97 100644 --- ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchem POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] PREHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -132,7 +132,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -142,6 +142,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -150,21 +154,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,57 +213,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -292,77 +250,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -414,7 +308,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -423,13 +317,17 @@ sort by j.p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -439,40 +337,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - auto parallelism: false - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -523,47 +390,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [j:p1, j:p2] Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -590,57 +427,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: j - output shape: _col1: string, _col2: string, _col5: int - type: SUBQUERY - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -667,64 +464,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: lag_window_0 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -772,7 +518,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -780,12 +526,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -793,21 +543,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -858,57 +602,13 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size from noop(on part_orc @@ -952,7 +652,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -964,7 +664,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -974,6 +674,10 @@ from noop(on part_orc order by p_name ) abc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -982,21 +686,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1047,57 +745,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1124,77 +782,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1246,7 +840,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1258,7 +852,7 @@ from noop(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1268,6 +862,10 @@ from noop(on part_orc order by p_name ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1276,21 +874,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1341,57 +933,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1418,78 +970,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: lag_window_2 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1541,7 +1028,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1554,7 +1041,7 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY POSTHOOK: query: -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1565,6 +1052,10 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1573,21 +1064,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1638,65 +1123,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col2, _col1, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1723,79 +1160,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: lag_window_2 - arguments: _col2, 1, _col2 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -1848,7 +1219,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -1857,13 +1228,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1872,21 +1247,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1937,77 +1306,16 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2079,43 +1387,13 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.* from noop(on part_orc @@ -2163,7 +1441,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -2172,13 +1450,17 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -2187,21 +1469,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2252,77 +1528,16 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2394,47 +1609,13 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 p_partkey (type: int) - 1 _col0 (type: int) - outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.* from part_orc p1 join noop(on part_orc @@ -2482,7 +1663,7 @@ POSTHOOK: Input: default@part_orc 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2491,13 +1672,17 @@ order by p_name, p_size desc) PREHOOK: type: QUERY POSTHOOK: query: -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2506,35 +1691,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: p_name: string, p_mfgr: string, p_size: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: p_name ASC NULLS FIRST, p_size DESC NULLS LAST - output shape: p_name: string, p_mfgr: string, p_size: int - partition by: p_mfgr - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - null sort order: aaz - sort order: ++- - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2585,57 +1747,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - null sort order: aaz - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2662,64 +1784,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1, _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r @@ -2765,7 +1836,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2776,7 +1847,7 @@ from noopwithmap(on part_orc PREHOOK: type: QUERY POSTHOOK: query: -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2785,6 +1856,10 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2793,36 +1868,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: p_name ASC NULLS FIRST - output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double - partition by: p_mfgr - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Unknown + vectorized: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2873,58 +1924,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2951,77 +1961,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -3071,7 +2017,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3082,7 +2028,7 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3091,6 +2037,10 @@ from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3099,21 +2049,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3164,57 +2108,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3241,77 +2145,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -3361,7 +2201,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3373,7 +2213,7 @@ order by p_mfgr, p_name PREHOOK: type: QUERY POSTHOOK: query: -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3383,6 +2223,10 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3392,21 +2236,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3457,80 +2295,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3557,65 +2332,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3642,77 +2369,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, @@ -3764,7 +2427,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3779,7 +2442,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3792,6 +2455,10 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3800,21 +2467,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3865,57 +2526,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [sub1:part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3942,69 +2563,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: _col5 - name: count - window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~ - window function definition - alias: sum_window_1 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:bigint:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, sub1.cd, sub1.s1 @@ -4062,7 +2627,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -4076,7 +2641,7 @@ order by p_name PREHOOK: type: QUERY POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -4088,6 +2653,10 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4097,21 +2666,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4162,77 +2725,16 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: abc - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col7 - columns.types int,string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4304,47 +2806,17 @@ STAGE PLANS: /part_orc [p1] #### A masked pattern was here #### Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4371,90 +2843,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: count_window_2 - arguments: _col1 - name: count - window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~ - window function definition - alias: sum_window_3 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - window function definition - alias: lag_window_4 - arguments: _col5, 1, _col5 - name: lag - window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types string:string:int:int:bigint:double:double:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, @@ -4512,7 +2907,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -4520,12 +2915,16 @@ order by p_name) PREHOOK: type: QUERY POSTHOOK: query: -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4534,21 +2933,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4599,65 +2992,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col2, _col1, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4684,40 +3029,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc @@ -4778,7 +3096,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part_orc POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4786,7 +3104,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -4794,6 +3112,10 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4802,31 +3124,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_brand, p_retailprice - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(p_retailprice) - keys: p_mfgr (type: string), p_brand (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4877,59 +3183,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [mfgr_price_view:part_orc] Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: mfgr_price_view - output shape: _col0: string, _col1: string, _col2: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col0 ASC NULLS FIRST - output shape: _col0: string, _col1: string, _col2: double - partition by: _col0 - raw input shape: - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4956,63 +3220,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~ - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:double:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 @@ -5096,7 +3310,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain extended +PREHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -5112,7 +3326,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -5128,6 +3342,10 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -5141,21 +3359,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5206,74 +3418,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int, _col7: double - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5300,80 +3455,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 Move Operator @@ -5408,18 +3493,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5446,67 +3523,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(5)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, sum_window_0 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,sum_window_0 - columns.types string,string,int,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-6 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: sum_window_0 (type: bigint), _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5533,87 +3560,10 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: bigint, _col2: string, _col3: string, _col6: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST - partition by: _col3 - raw input shape: - window functions: - window function definition - alias: rank_window_1 - arguments: _col3, _col2 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_2 - arguments: _col3, _col2 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: cume_dist_window_3 - arguments: _col3, _col2 - name: cume_dist - window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: first_value_window_4 - arguments: _col6, true - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-1 Move Operator @@ -5766,7 +3716,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5785,7 +3735,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5802,6 +3752,10 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -5811,21 +3765,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5876,87 +3824,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5983,65 +3861,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6068,77 +3898,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, @@ -6204,7 +3970,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6223,7 +3989,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6240,6 +4006,10 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6250,21 +4020,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6315,64 +4079,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6399,57 +4116,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6476,57 +4153,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6553,77 +4190,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, @@ -6689,7 +4262,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6706,7 +4279,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6721,6 +4294,10 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6730,21 +4307,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6795,64 +4366,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6879,64 +4403,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6963,77 +4440,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, @@ -7095,7 +4508,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -7114,7 +4527,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -7131,6 +4544,10 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -7141,21 +4558,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7206,64 +4617,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7290,73 +4654,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7383,58 +4691,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-4 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7461,77 +4728,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, @@ -7597,7 +4800,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -7615,7 +4818,7 @@ from noop(on PREHOOK: type: QUERY POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -7631,6 +4834,10 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -7640,21 +4847,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7705,80 +4906,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7805,65 +4943,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - transforms raw input: true - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -7890,77 +4980,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col2, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2, _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col2, _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, @@ -8024,7 +5050,7 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -8040,7 +5066,7 @@ from noopwithmap(on PREHOOK: type: QUERY POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -8054,6 +5080,10 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -8063,21 +5093,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8128,80 +5152,17 @@ STAGE PLANS: Truncated Path -> Alias: /part_orc [part_orc] Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: part_orc - output shape: _col1: string, _col2: string, _col5: int - type: TABLE - Partition table definition - input alias: ptf_1 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Partition table definition - input alias: ptf_2 - name: noop - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Map-side function: true - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8228,58 +5189,17 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: PTFCOMPONENT - Partition table definition - input alias: ptf_1 - name: noopwithmap - order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST - output shape: _col1: string, _col2: string, _col5: int - partition by: _col2, _col1 - raw input shape: - transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -8306,77 +5226,13 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: dense_rank_window_1 - arguments: _col1 - name: dense_rank - window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_2 - arguments: _col5 - name: sum - window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~ - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index cd05fd8..5a55544 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -1,15 +1,19 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT PREHOOK: type: QUERY POSTHOOK: query: -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -19,110 +23,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: t2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-2 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-3 Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint) AS CNT, MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 diff --git ql/src/test/results/clientpositive/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/vectorized_string_funcs.q.out index ca938b0..d586683 100644 --- ql/src/test/results/clientpositive/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_string_funcs.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -24,7 +24,7 @@ and cstring1 like '%' PREHOOK: type: QUERY POSTHOOK: query: -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) @@ -46,6 +46,10 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -53,31 +57,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: select substr(cstring1, 1, 2) diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index d5d93ed..c2948f6 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -19,12 +19,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -64,12 +68,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +135,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,15 +159,34 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -173,12 +204,16 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,17 +228,48 @@ STAGE PLANS: Select Operator expressions: ts (type: timestamp) outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: IdentityExpression[0:timestamp] + vectorized: true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ts), max(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(IdentityExpression[0:timestamp]), VectorUDAFMaxTimestamp(IdentityExpression[0:timestamp]) + className: VectorGroupByOperator + vectorOutput: true + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + vectorized: true Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1) diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 58af3e2..71cf432 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_wrong POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -106,38 +110,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(ctimestamp1) AS c1, @@ -209,7 +197,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -222,7 +210,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -235,6 +223,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -242,38 +234,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(stimestamp1) AS c1, @@ -345,7 +321,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -358,7 +334,7 @@ PREHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -371,6 +347,10 @@ POSTHOOK: query: EXPLAIN SELECT FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -378,38 +358,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: -- Should all be true or NULL SELECT @@ -484,7 +448,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -498,7 +462,7 @@ FROM alltypesorc_wrong ORDER BY c1 PREHOOK: type: QUERY POSTHOOK: query: -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -511,6 +475,10 @@ EXPLAIN SELECT FROM alltypesorc_wrong ORDER BY c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -518,38 +486,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_wrong - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT to_unix_timestamp(stimestamp1) AS c1, @@ -584,20 +536,24 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count(*) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -605,43 +561,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT min(ctimestamp1), @@ -663,15 +598,19 @@ POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL 0 40 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -679,46 +618,19 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(ctimestamp1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: round(_col0, 3) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT round(sum(ctimestamp1), 3) @@ -733,7 +645,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -744,7 +656,7 @@ PREHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -755,6 +667,10 @@ POSTHOOK: query: EXPLAIN SELECT round(stddev_samp(ctimestamp1), 3) FROM alltypesorc_string POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -762,47 +678,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Stage: Stage-0 - Fetch Operator limit: -1 - Processor Tree: - ListSink PREHOOK: query: SELECT round(avg(ctimestamp1), 0), diff --git ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out index 31afb2c..6d216b1 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -16,7 +16,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -34,6 +34,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,20 +50,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColEqualLongScalar[-1:boolean](LongColModuloLongScalar[12:long]) + vectorized: true predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastMillisecondsLongToTimestamp[14:timestamp], CastMillisecondsLongToTimestamp[16:timestamp], CastMillisecondsLongToTimestamp[18:timestamp], CastMillisecondsLongToTimestamp[20:timestamp], CastDoubleToTimestamp[21:timestamp], CastDoubleToTimestamp[22:timestamp], CastMillisecondsLongToTimestamp[24:timestamp], CastMillisecondsLongToTimestamp[26:timestamp](LongColMultiplyLongScalar[12:long]), IdentityExpression[8:timestamp], VectorUDFAdaptor[27:Timestamp], VectorUDFAdaptor[29:Timestamp](StringSubstrColStartLen[28:string]) + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -131,7 +160,7 @@ POSTHOOK: Input: default@alltypesorc 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL 1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -149,7 +178,7 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select -- to timestamp cast (ctinyint as timestamp) @@ -167,6 +196,10 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -179,20 +212,45 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + nativeConditionsMet: Supported IS true + predicateExpression: FilterLongColEqualLongScalar[-1:boolean](LongColModuloLongScalar[12:long]) + vectorized: true predicate: ((cbigint % 250) = 0) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + nativeConditionsMet: Supported IS true + selectExpressions: CastLongToTimestamp[13:timestamp], CastLongToTimestamp[14:timestamp], CastLongToTimestamp[15:timestamp], CastLongToTimestamp[16:timestamp], CastDoubleToTimestamp[17:timestamp], CastDoubleToTimestamp[18:timestamp], CastLongToTimestamp[19:timestamp], CastLongToTimestamp[20:timestamp](LongColMultiplyLongScalar[12:long]), IdentityExpression[8:timestamp], VectorUDFAdaptor[21:Timestamp], VectorUDFAdaptor[23:Timestamp](StringSubstrColStartLen[22:string]) + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + nativeConditionsNotMet: Supported IS false + vectorized: true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator